Spaces:

stephmnt
/

projet_05

Sleeping

App Files Files Community

GitHub Actions commited on Nov 17, 2025

Commit

1e83921

1 Parent(s): 4a7ca9a

🚀 Auto-deploy from GitHub Actions

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +5 -5
hf_space/README.md +157 -323
hf_space/hf_space/hf_space/hf_space/hf_space/Makefile +3 -1
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +1 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +66 -1
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +547 -17
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +7 -3
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/static.yml +37 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +5 -12
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +13 -9
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +35 -1
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +5 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +7 -27
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +2 -2
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +34 -18
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/LICENSE +10 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Makefile +85 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +178 -4
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +7 -4
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +191 -1
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +328 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py +0 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py +7 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +37 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes +35 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +2 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +12 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +7 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py +17 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep +0 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock +0 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.toml +2 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py +1 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/config.py +32 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py +29 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py +29 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py +0 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py +30 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py +30 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/plots.py +29 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml +53 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/references/.gitkeep +0 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/.gitkeep +0 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep +0 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_data.py +5 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py +3 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/branding.py +52 -0
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py +188 -14

README.md CHANGED Viewed

@@ -13,11 +13,11 @@ python_version: 3.11
 # OCR Projet 05 – Prédiction d’attrition
-![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)
-![GitHub Release Date](https://img.shields.io/github/release-date/stephmnt/OCR_Projet05?display_date=published_at&style=flat-square)
-[![project_license][https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge]][https://github.com/stephmnt/OCR_Projet05/blob/main/LICENSE]
-![MkDocs][https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff][https://stephmnt.github.io/OCR_Projet05/]
-![[Cookie Cutter][https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter]][https://cookiecutter-data-science.drivendata.org/]
 Ce dépôt contient le projet OCR_Projet05. Il s’agit d’une application Gradio déployable sur Hugging Face Spaces, alimentée par un pipeline de préparation de données, un entraînement automatique et des services d’inférence orientés RH (prédiction de départ d’employés).

 # OCR Projet 05 – Prédiction d’attrition
+[![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)](https://github.com/stephmnt/OCR_Projet05/actions/workflows/deploy.yml)
+[![GitHub Release Date](https://img.shields.io/github/release-date/stephmnt/OCR_Projet05?display_date=published_at&style=flat-square)](https://github.com/stephmnt/OCR_Projet05/releases)
+[![project_license](https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge)](https://github.com/stephmnt/OCR_Projet05/blob/main/LICENSE)
+[![MkDocs](https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff)](https://stephmnt.github.io/OCR_Projet05/)
+[![Cookie Cutter](https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter)](https://cookiecutter-data-science.drivendata.org/)
 Ce dépôt contient le projet OCR_Projet05. Il s’agit d’une application Gradio déployable sur Hugging Face Spaces, alimentée par un pipeline de préparation de données, un entraînement automatique et des services d’inférence orientés RH (prédiction de départ d’employés).

hf_space/README.md CHANGED Viewed

@@ -11,395 +11,229 @@ short_description: Projet 05 formation Openclassrooms
 python_version: 3.11
 ---
-# projet_05 : Déployez un modèle de Machine Learning
-<a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
-    <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
-</a>
-[![mkdocs-shield]][mkdocs-url]
-## Organisation du projet
-```
-├── LICENSE            <- Open-source license if one is chosen
-├── Makefile           <- Makefile with convenience commands like `make data` or `make train`
-├── README.md          <- The top-level README for developers using this project.
-├── data
-│   ├── external       <- Data from third party sources.
-│   ├── interim        <- Intermediate data that has been transformed.
-│   ├── processed      <- The final, canonical data sets for modeling.
-│   └── raw            <- The original, immutable data dump.
-│
-├── docs               <- A default mkdocs project; see www.mkdocs.org for details
-│
-├── models             <- Trained and serialized models, model predictions, or model summaries
-│
-├── notebooks          <- Jupyter notebooks. Naming convention is a number (for ordering),
-│                         the creator's initials, and a short `-` delimited description, e.g.
-│                         `1.0-jqp-initial-data-exploration`.
-│
-├── pyproject.toml     <- Project configuration file with package metadata for
-│                         projet_05 and configuration for tools like black
-│
-├── references         <- Data dictionaries, manuals, and all other explanatory materials.
-│
-├── reports            <- Generated analysis as HTML, PDF, LaTeX, etc.
-│   └── figures        <- Generated graphics and figures to be used in reporting
-│
-├── requirements.txt   <- The requirements file for reproducing the analysis environment, e.g.
-│                         generated with `pip freeze > requirements.txt`
-│
-├── setup.cfg          <- Configuration file for flake8
-│
-└── projet_05   <- Source code for use in this project.
-    │
-    ├── __init__.py             <- Makes projet_05 a Python module
-    │
-    ├── config.py               <- Store useful variables and configuration
-    │
-    ├── dataset.py              <- Scripts to download or generate data
-    │
-    ├── features.py             <- Code to create features for modeling
-    │
-    ├── modeling
-    │   ├── __init__.py
-    │   ├── predict.py          <- Code to run model inference with trained models
-    │   └── train.py            <- Code to train models
-    │
-    └── plots.py                <- Code to create visualizations
-```
-## Code hérité réutilisé
-- `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thème.
-- `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
-## Base de données PostgreSQL
-Depuis la branche `postgresql`, toute la fusion des fichiers bruts repose sur une base PostgreSQL accessible via SQLAlchemy.
-1. Installez PostgreSQL (Homebrew, package officiel, etc.).
-2. Créez un rôle et la base attendue :
-> Exemple pour MacOS
-   ```bash
-   /opt/homebrew/opt/postgresql@17/bin/createuser -s postgres
-   /opt/homebrew/opt/postgresql@17/bin/psql -d postgres -c "ALTER USER postgres WITH PASSWORD 'postgres';"
-   /opt/homebrew/opt/postgresql@17/bin/createdb -O postgres projet05
-   ```
-   Adaptez les chemins/versions selon votre environnement.
-3. Renseignez la chaîne de connexion dans `projet_05/settings.yml` :
-   ```yaml
-   database:
-     url: postgresql+psycopg://user:password@host:5432/projet05
-     schema: public
-   ```
-   Il est également possible de définir `PROJET05_DATABASE_URL` dans l'environnement.
-4. Initialisez la base (création des tables + insertion des CSV d'exemple) avec :
-   ```bash
-   python -m scripts.init_db
-   ```
-5. Assurez-vous que l'utilisateur possède les droits `CREATE/DROP TABLE` dans le schéma ciblé : les tables `sirh`, `evaluation`, `sond` ainsi que `prediction_logs` seront créées ou recréées à chaque ré-exécution.
-6. Lancez ensuite `python -m projet_05.dataset` comme auparavant (ou `python main.py` pour exécuter toutes les étapes). La requête SQL utilisée est toujours exportée dans `reports/merge_sql.sql` pour audit.
-> Les interactions utilisateur/modèle (qu'elles proviennent du formulaire, du tableau ou d'un upload) sont automatiquement journalisées dans la table `prediction_logs`, ce qui permet de tracer les usages et de constituer un dataset réel pour le monitoring.
-## Tests & couverture
-Une batterie de tests Pytest valident l’intégrité de la base PostgreSQL, la fusion des données et la journalisation des prédictions.
-1. Démarrez PostgreSQL (cf. section précédente) et créez un utilisateur ayant les droits `CREATE/DROP DATABASE`.
-2. Facultatif : définissez `PROJET05_TEST_DATABASE_URL` si vous souhaitez utiliser une URL différente de `postgresql+psycopg://postgres:postgres@localhost:5432/projet05_test`.
-3. Exécutez les tests et générez le rapport de couverture :
-   ```bash
-   pytest
-   ```
-   La configuration Pytest produit à la fois un rapport terminal (`--cov-report=term-missing`) et un fichier `coverage.xml` exploitable par vos outils CI/CD.
-   Les sorties complètes sont sauvegardées dans `logs/tests_logs/<timestamp>.log`.
-Les tests vérifient notamment :
-- la création des tables `sirh`, `evaluation`, `sond`, `prediction_logs` et la cohérence du nombre de lignes insérées ;
-- l’intégrité du DataFrame fusionné (typage, absence de valeurs nulles sur la clé primaire, cohérence de la cible) ;
-- la robustesse du script de log des prédictions (insertion d’entrées dans `prediction_logs` et nettoyage) ;
-- la génération des logs de pipeline, regroupés dans `logs/pipeline_logs/<timestamp>.log`.
---------
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
-<!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
-<a id="readme-top"></a>
-<!--
-*** Thanks for checking out the Best-README-Template. If you have a suggestion
-*** that would make this better, please fork the repo and create a pull request
-*** or simply open an issue with the tag "enhancement".
-*** Don't forget to give the project a star!
-*** Thanks again! Now go create something AMAZING! :D
--->
-<!-- PROJECT SHIELDS -->
-<!--
-*** I'm using markdown "reference style" links for readability.
-*** Reference links are enclosed in brackets [ ] instead of parentheses ( ).
-*** See the bottom of this document for the declaration of the reference variables
-*** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use.
-*** https://www.markdownguide.org/basic-syntax/#reference-style-links
--->
-[![Contributors][contributors-shield]][contributors-url]
-[![Python][python]][python]
-[![Forks][forks-shield]][forks-url]
-[![Stargazers][stars-shield]][stars-url]
-[![Issues][issues-shield]][issues-url]
-[![project_license][license-shield]][license-url]
-[![LinkedIn][linkedin-shield]][linkedin-url]
-![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)
-<!-- PROJECT LOGO -->
-<br />
-<div align="center">
-  <a href="https://github.com/github_username/repo_name">
-    <img src="images/logo.png" alt="Logo" width="80" height="80">
-  </a>
-<h3 align="center">project_title</h3>
-  <p align="center">
-    project_description
-    <br />
-    <a href="https://github.com/github_username/repo_name"><strong>Explore the docs »</strong></a>
-    <br />
-    <br />
-    <a href="https://github.com/github_username/repo_name">View Demo</a>
-    &middot;
-    <a href="https://github.com/github_username/repo_name/issues/new?labels=bug&template=bug-report---.md">Report Bug</a>
-    &middot;
-    <a href="https://github.com/github_username/repo_name/issues/new?labels=enhancement&template=feature-request---.md">Request Feature</a>
-  </p>
-</div>
-<!-- TABLE OF CONTENTS -->
-<details>
-  <summary>Table of Contents</summary>
-  <ol>
-    <li>
-      <a href="#about-the-project">About The Project</a>
-      <ul>
-        <li><a href="#built-with">Built With</a></li>
-      </ul>
-    </li>
-    <li>
-      <a href="#getting-started">Getting Started</a>
-      <ul>
-        <li><a href="#prerequisites">Prerequisites</a></li>
-        <li><a href="#installation">Installation</a></li>
-      </ul>
-    </li>
-    <li><a href="#usage">Usage</a></li>
-    <li><a href="#roadmap">Roadmap</a></li>
-    <li><a href="#contributing">Contributing</a></li>
-    <li><a href="#license">License</a></li>
-    <li><a href="#contact">Contact</a></li>
-    <li><a href="#acknowledgments">Acknowledgments</a></li>
-  </ol>
-</details>
-<!-- ABOUT THE PROJECT -->
-## About The Project
-[![Product Name Screen Shot][product-screenshot]](https://example.com)
-Here's a blank template to get started. To avoid retyping too much info, do a search and replace with your text editor for the following: `github_username`, `repo_name`, `twitter_handle`, `linkedin_username`, `email_client`, `email`, `project_title`, `project_description`, `project_license`
-<p align="right">(<a href="#readme-top">back to top</a>)</p>
-### Built With
-* [![Python][Python]][Python-url]
-* [![SQL][SQL]][SQL-url]
-<p align="right">(<a href="#readme-top">back to top</a>)</p>
-<!-- GETTING STARTED -->
-## Getting Started
-This is an example of how you may give instructions on setting up your project locally.
-To get a local copy up and running follow these simple example steps.
-### Prerequisites
-This is an example of how to list things you need to use the software and how to install them.
-* npm
-  ```sh
-  npm install npm@latest -g
-  ```
-### Installation
-pip install -r requirements.txt
-uvicorn app.main:app --reload
-1. Clone the repo
-   ```sh
-   git clone https://github.com/stephmnt/OCR_Projet05.git
-   ```
-2. Install NPM packages
-   ```sh
-   npm install
-   ```
-3. Enter your API in `config.js`
-   ```js
-   const API_KEY = 'ENTER YOUR API';
-   ```
-4. Change git remote url to avoid accidental pushes to base project
-   ```sh
-   git remote set-url origin github_username/repo_name
-   git remote -v # confirm the changes
-   ```
-<p align="right">(<a href="#readme-top">back to top</a>)</p>
-<!-- USAGE EXAMPLES -->
-## Usage
-Use this space to show useful examples of how a project can be used. Additional screenshots, code examples and demos work well in this space. You may also link to more resources.
-_For more examples, please refer to the [Documentation](https://example.com)_
-<p align="right">(<a href="#readme-top">back to top</a>)</p>
-<!-- ROADMAP -->
-## Roadmap
-- [ ] Feature 1
-- [ ] Feature 2
-- [ ] Feature 3
-    - [ ] Nested Feature
-See the [open issues](https://github.com/stephmnt/OCR_projet05/issues) for a full list of proposed features (and known issues).
-<p align="right">(<a href="#readme-top">back to top</a>)</p>
-<!-- CONTRIBUTING -->
-## Contributing
-Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**.
-If you have a suggestion that would make this better, please fork the repo and create a pull request. You can also simply open an issue with the tag "enhancement".
-Don't forget to give the project a star! Thanks again!
-1. Fork the Project
-2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
-3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
-4. Push to the Branch (`git push origin feature/AmazingFeature`)
-5. Open a Pull Request
-<p align="right">(<a href="#readme-top">back to top</a>)</p>
-### Top contributors:
-<a href="https://github.com/github_username/repo_name/graphs/contributors">
-  <img src="https://contrib.rocks/image?repo=github_username/repo_name" alt="contrib.rocks image" />
-</a>
-<!-- LICENSE -->
-## License
-Distributed under the project_license. See `LICENSE.txt` for more information.
-<p align="right">(<a href="#readme-top">back to top</a>)</p>
-<!-- CONTACT -->
-## Contact
-Your Name - [@twitter_handle](https://twitter.com/twitter_handle) - email@email_client.com
-Project Link: [https://github.com/github_username/repo_name](https://github.com/github_username/repo_name)
-<p align="right">(<a href="#readme-top">back to top</a>)</p>
-<!-- ACKNOWLEDGMENTS -->
-## Acknowledgments
-* []()
-* []()
-* []()
-<p align="right">(<a href="#readme-top">back to top</a>)</p>
-<!-- MARKDOWN LINKS & IMAGES -->
-<!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
-[contributors-shield]: https://img.shields.io/github/contributors/stephmnt/OCR_projet05.svg?style=for-the-badge
-[contributors-url]: https://github.com/stephmnt/OCR_projet05/graphs/contributors
-[forks-shield]: https://img.shields.io/github/forks/stephmnt/OCR_projet05.svg?style=for-the-badge
-[forks-url]: https://github.com/stephmnt/OCR_projet05/network/members
-[stars-shield]: https://img.shields.io/github/stars/stephmnt/OCR_projet05.svg?style=for-the-badge
-[stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
-[issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
-[issues-url]: https://github.com/stephmnt/OCR_projet05/issues
-[product-screenshot]: images/screenshot.png
-[Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
-<!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
-[Next.js]: https://img.shields.io/badge/next.js-000000?style=for-the-badge&logo=nextdotjs&logoColor=white
-[Next-url]: https://nextjs.org/
-[React.js]: https://img.shields.io/badge/React-20232A?style=for-the-badge&logo=react&logoColor=61DAFB
-[React-url]: https://reactjs.org/
-[Vue.js]: https://img.shields.io/badge/Vue.js-35495E?style=for-the-badge&logo=vuedotjs&logoColor=4FC08D
-[Vue-url]: https://vuejs.org/
-[Angular.io]: https://img.shields.io/badge/Angular-DD0031?style=for-the-badge&logo=angular&logoColor=white
-[Angular-url]: https://angular.io/
-[Svelte.dev]: https://img.shields.io/badge/Svelte-4A4A55?style=for-the-badge&logo=svelte&logoColor=FF3E00
-[Svelte-url]: https://svelte.dev/
-[Laravel.com]: https://img.shields.io/badge/Laravel-FF2D20?style=for-the-badge&logo=laravel&logoColor=white
-[Laravel-url]: https://laravel.com
-[Bootstrap.com]: https://img.shields.io/badge/Bootstrap-563D7C?style=for-the-badge&logo=bootstrap&logoColor=white
-[Bootstrap-url]: https://getbootstrap.com
-[JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
-[JQuery-url]: https://jquery.com
-<!-- OK -->
-[license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
-[license-url]: https://github.com/stephmnt/OCR_Projet05/blob/main/LICENSE
-[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
-[linkedin-url]: https://linkedin.com/in/stephanemanet
-<!-- TODO: -->
-[postgres-shield]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
-[python-shield]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
-[mkdocs-shield]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
-[mkdocs-url]: https://stephmnt.github.io/OCR_Projet05/
-[NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
-[![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
-![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)
-[![https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff]][[mkdocs-url](https://stephmnt.github.io/OCR_Projet05/)]
-![GitHub Release Date](https://img.shields.io/github/release-date/stephmnt/OCR_Projet05?display_date=published_at&style=flat-square)
-![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)

 python_version: 3.11
 ---
+# OCR Projet 05 – Prédiction d’attrition
+[![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)](https://github.com/stephmnt/OCR_Projet05/actions/workflows/deploy.yml)
+[![GitHub Release Date](https://img.shields.io/github/release-date/stephmnt/OCR_Projet05?display_date=published_at&style=flat-square)](https://github.com/stephmnt/OCR_Projet05/releases)
+[![project_license](https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge)](https://github.com/stephmnt/OCR_Projet05/blob/main/LICENSE)
+[![MkDocs](https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff)](https://stephmnt.github.io/OCR_Projet05/)
+[![Cookie Cutter](https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter)](https://cookiecutter-data-science.drivendata.org/)
+Ce dépôt contient le projet OCR_Projet05. Il s’agit d’une application Gradio déployable sur Hugging Face Spaces, alimentée par un pipeline de préparation de données, un entraînement automatique et des services d’inférence orientés RH (prédiction de départ d’employés).
+Ce document décrit :
+- la **présentation fonctionnelle** ;
+- les **instructions d’installation, d’utilisation et de déploiement** (local + Hugging Face) ;
+- le **processus de stockage/gestion des données** (PostgreSQL + journaux) ;
+- les **besoins analytiques** (tableaux de bord, métriques clés).
+---
+## 1. Vue d’ensemble du projet
+- **Objectif métier** : détecter les employés à risque de départ en exploitant 3 sources brutes (SIRH, évaluation, sondage interne).
+- **Technologie** : pipeline Python (Typer, pandas, scikit-learn, SQLAlchemy) + application Gradio (`app.py`) déployée sur Hugging Face.
+- **Modèle** : pipeline scikit-learn (prétraitement + classifieur) sérialisé dans `models/best_model.joblib`, paramétré avec un seuil de décision optimisé (visible dans l’UI).
+- **Journaux** : deux sous-dossiers `logs/pipeline_logs` et `logs/tests_logs` contiennent respectivement les traces du pipeline `main.py` et les sorties Pytest.
+Arborescence clé :
+```
+├── projet_05/                # Package Python principal
+├── app.py                    # Interface Gradio (déploiement HF)
+├── scripts/init_db.py        # Création/initialisation PostgreSQL
+├── main.py                   # Orchestrateur du pipeline local
+├── docs/                     # Documentation MkDocs + tests.md
+├── tests/                    # Suite Pytest (DB + intégration)
+└── requirements.txt          # Dépendances runtime (HF)
+```
+---
+## 2. Installation locale
+### 2.1. Prérequis
+1. Python 3.11 (virtualenv ou Poetry recommandé).
+2. PostgreSQL (>= 17) accessible localement (cf. instructions DB plus bas).
+3. Outils optionnels : `make`, `pip`, `pytest`.
+### 2.2. Étapes
+```bash
+git clone https://github.com/stephmnt/OCR_Projet05.git
+cd OCR_Projet05
+python -m venv .venv && source .venv/bin/activate
+pip install -r requirements.txt     # pour HF
+pip install -e .                    # pour le développement local (pyproject)
+```
+### 2.3. Configuration PostgreSQL
+```bash
+/opt/homebrew/opt/postgresql@17/bin/createuser -s postgres
+/opt/homebrew/opt/postgresql@17/bin/psql -d postgres -c "ALTER USER postgres WITH PASSWORD 'postgres';"
+/opt/homebrew/opt/postgresql@17/bin/createdb -O postgres projet05
+```
+Puis dans `projet_05/settings.yml` :
+```yaml
+database:
+  url: postgresql+psycopg://postgres:postgres@localhost:5432/projet05
+  schema: public
+```
+> Sur une autre infrastructure, adaptez l’URL ou utilisez `PROJET05_DATABASE_URL`.
+---
+## 3. Utilisation du pipeline
+### 3.1. Initialiser la base
+```bash
+python -m scripts.init_db
+```
+Création des tables `sirh`, `evaluation`, `sond`, `prediction_logs` + insertion des CSV bruts situés dans `data/raw`.
+### 3.2. Pipeline complet
+```bash
+python main.py
+```
+- `main.py` enchaîne :
+  1. Initialisation base PostgreSQL (`scripts.init_db`)
+  2. Préparation des données (`projet_05.dataset`)
+  3. Feature engineering (`projet_05.features`)
+  4. Entraînement (`projet_05.modeling.train`)
+- Les logs sont regroupés dans `logs/pipeline_logs/<timestamp>.log`.
+### 3.3. Application Gradio locale
+```bash
+python app.py
+```
+L’interface propose :
+- onglet **Formulaire** ;
+- **Tableau interactif** ;
+- **Upload CSV** ;
+- **Fichiers non-mergés** (chargement des trois CSV bruts, fusion automatique).
+Un appel à `_log_predictions` trace chaque prédiction dans PostgreSQL (`prediction_logs`).
+---
+## 4. Déploiement sur Hugging Face
+### 4.1. Dépendances
+`requirements.txt` contient toutes les bibliothèques nécessaires à la Space (Gradio, scikit-learn, pandas, SQLAlchemy, psycopg…).
+### 4.2. Étapes
+1. Créer une Space Gradio (Python 3.11).
+2. Copier `app.py`, `requirements.txt`, `models/`, `data/processed/schema.json`.
+3. Configurer les secrets HF (si besoin de variables d’environnement).
+4. Optionnel : définir `HUGGINGFACEHUB_API_TOKEN` pour automatiser les déploiements via GitHub Actions.
+### 4.3. Spécificités Space
+- Hugging Face n’expose pas PostgreSQL. L’application Gradio bascule alors sur le mode **pandas fallback** (fusion locale) grâce à la gestion d’erreur de `dataset.py`.
+- Les journaux restants sont ceux générés par l’appli (pas d’écriture dans `logs/` côté Space).
+---
+## 5. Processus de stockage & gestion des données
+### 5.1. Sources
+- `data/raw/extrait_sirh.csv`
+- `data/raw/extrait_eval.csv`
+- `data/raw/extrait_sondage.csv`
+### 5.2. Base relationnelle
+Tables PostgreSQL créées par `scripts.init_db` :
+| Table | Rôle | Colonnes clés |
+| --- | --- | --- |
+| `sirh` | Profil RH structuré | `id_employee`, `age`, `revenu_mensuel`, `poste`, etc. |
+| `evaluation` | Historique d’évaluations | `id_employee`, `note_evaluation_actuelle`, `niveau_hierarchique_poste`, `satisfaction_*` |
+| `sond` | Sondage + cible | `id_employee`, `a_quitte_l_entreprise`, `distance_domicile_travail`, `domaine_etude`, etc. |
+| `prediction_logs` | Journal d’inférence | `log_id`, `created_at`, `id_employee`, `source`, `probability`, `decision`, `payload` JSON |
+`projet_05.dataset` fusionne `sirh ∩ evaluation ∩ sond` via SQL ; en cas d’indisponibilité DB, la fusion pandas est utilisée en repli.
+### 5.3. Journaux et tracing
+- `logs/pipeline_logs` : sorties `main.py`
+- `logs/tests_logs` : sorties Pytest (`make test`)
+- `prediction_logs` : base PostgreSQL, indispensable pour l’audit des décisions ML.
+---
+## 6. Tests et couverture
+### 6.1. Exécution
+```bash
+pytest
+```
+- La fixture `initialized_db` crée une base `projet05_test`, lance `scripts.init_db`, puis la supprime.
+- Les logs Pytest sont stockés dans `logs/tests_logs/<timestamp>.log`.
+### 6.2. Couverture
+- Rapports `term-missing` + `coverage.xml`.
+- Zones non couvertes : `features.py`, `modeling/train.py`, `explainability.py` (à prioriser si besoin).
+---
+## 7. Besoins analytiques / tableaux de bord
+- **Dashboard RH** basé sur les journaux `prediction_logs` :
+  - Volume de prédictions par source (Formulaire / CSV / Raw).
+  - Répartition des scores (`proba_depart`) / seuil de décision.
+  - Historique des décisions (tendance du taux de risque).
+  - Drill-down par attributs (`departement`, `poste`, `genre`…).
+- **Monitoring modèle** :
+  - Taux d’utilisation (logs journaliers).
+  - Drift potentiel : comparer les distributions des features avec `docs/` (notebooks d’analyse) ou via un outil externe.
+- **KPI Data/IT** :
+  - Latence d’inférence (calculable via timestamps, si ajoutés).
+  - Suivi des erreurs (logs pipeline/tests).
+---
+## 8. Choix techniques et justification
+Ce projet combine une interface Gradio, une base PostgreSQL et un pipeline CI/CD GitHub Actions. Les décisions d’architecture détaillant le pourquoi/du comment (Gradio vs FastAPI, choix de PostgreSQL, automatisations) sont regroupées dans [`docs/docs/choix-techniques.md`](docs/docs/choix-techniques.md). Cette section sert de support de soutenance pour rappeler :
+- pourquoi Gradio a été privilégié pour la démonstration Hugging Face ;
+- comment PostgreSQL sécurise la fusion des trois sources et la journalisation ;
+- en quoi les workflows GitHub Actions garantissent un déploiement fiable.
+- comment les environnements sont configurés : `main.py` est exécuté en environnement **test** (base `projet05_test`, variables `PROJET05_TEST_DATABASE_URL`) pour valider le pipeline complet, tandis que `app.py` tourne en **production** (Space Hugging Face, variable `PROJET05_DATABASE_URL`/fallback pandas) afin de servir les utilisateurs finaux.
+## 9. Instructions rapides
+| Action | Commande |
+| --- | --- |
+| Init DB + pipeline complet | `python main.py` |
+| Lancer Gradio local | `python app.py` |
+| Initialiser la base seule | `python -m scripts.init_db` |
+| Lancer les tests + logs | `make test` |
+| Déployer sur Hugging Face | Pousser `app.py`, `requirements.txt`, `models/`, config Space |
+---
+## 10. Licence / références
+Ce projet est fourni dans le cadre de la formation OpenClassrooms.
+La documentation complémentaire est disponible dans `docs/` (MkDocs + `docs/docs/tests.md` pour les tests).
+Pour toute question : [LinkedIn](https://linkedin.com/in/stephanemanet).

hf_space/hf_space/hf_space/hf_space/hf_space/Makefile CHANGED Viewed

@@ -43,7 +43,9 @@ format:
 ## Run tests
 .PHONY: test
 test:
-	python -m pytest tests
 ## Set up Python interpreter environment

 ## Run tests
 .PHONY: test
 test:
+	@mkdir -p logs/tests_logs
+	@timestamp=$$(date +%Y%m%d_%H%M%S); \
+	pytest | tee logs/tests_logs/$$timestamp.log
 ## Set up Python interpreter environment

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore CHANGED Viewed

@@ -7,6 +7,7 @@ questions.md
 /reports/
 /data/
 runtime.txt
 # vim
 *.swp

 /reports/
 /data/
 runtime.txt
+/logs/
 # vim
 *.swp

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED Viewed

@@ -76,6 +76,66 @@ python_version: 3.11
 - `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thème.
 - `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
 --------
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
@@ -105,7 +165,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
 [![Issues][issues-shield]][issues-url]
 [![project_license][license-shield]][license-url]
 [![LinkedIn][linkedin-shield]][linkedin-url]
-![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
 <!-- PROJECT LOGO -->
 <br />
@@ -338,3 +398,8 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
 [mkdocs-url]: https://stephmnt.github.io/OCR_Projet05/
 [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
 [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)

 - `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thème.
 - `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
+## Base de données PostgreSQL
+Depuis la branche `postgresql`, toute la fusion des fichiers bruts repose sur une base PostgreSQL accessible via SQLAlchemy.
+1. Installez PostgreSQL (Homebrew, package officiel, etc.).
+2. Créez un rôle et la base attendue :
+> Exemple pour MacOS
+   ```bash
+   /opt/homebrew/opt/postgresql@17/bin/createuser -s postgres
+   /opt/homebrew/opt/postgresql@17/bin/psql -d postgres -c "ALTER USER postgres WITH PASSWORD 'postgres';"
+   /opt/homebrew/opt/postgresql@17/bin/createdb -O postgres projet05
+   ```
+   Adaptez les chemins/versions selon votre environnement.
+3. Renseignez la chaîne de connexion dans `projet_05/settings.yml` :
+   ```yaml
+   database:
+     url: postgresql+psycopg://user:password@host:5432/projet05
+     schema: public
+   ```
+   Il est également possible de définir `PROJET05_DATABASE_URL` dans l'environnement.
+4. Initialisez la base (création des tables + insertion des CSV d'exemple) avec :
+   ```bash
+   python -m scripts.init_db
+   ```
+5. Assurez-vous que l'utilisateur possède les droits `CREATE/DROP TABLE` dans le schéma ciblé : les tables `sirh`, `evaluation`, `sond` ainsi que `prediction_logs` seront créées ou recréées à chaque ré-exécution.
+6. Lancez ensuite `python -m projet_05.dataset` comme auparavant (ou `python main.py` pour exécuter toutes les étapes). La requête SQL utilisée est toujours exportée dans `reports/merge_sql.sql` pour audit.
+> Les interactions utilisateur/modèle (qu'elles proviennent du formulaire, du tableau ou d'un upload) sont automatiquement journalisées dans la table `prediction_logs`, ce qui permet de tracer les usages et de constituer un dataset réel pour le monitoring.
+## Tests & couverture
+Une batterie de tests Pytest valident l’intégrité de la base PostgreSQL, la fusion des données et la journalisation des prédictions.
+1. Démarrez PostgreSQL (cf. section précédente) et créez un utilisateur ayant les droits `CREATE/DROP DATABASE`.
+2. Facultatif : définissez `PROJET05_TEST_DATABASE_URL` si vous souhaitez utiliser une URL différente de `postgresql+psycopg://postgres:postgres@localhost:5432/projet05_test`.
+3. Exécutez les tests et générez le rapport de couverture :
+   ```bash
+   pytest
+   ```
+   La configuration Pytest produit à la fois un rapport terminal (`--cov-report=term-missing`) et un fichier `coverage.xml` exploitable par vos outils CI/CD.
+   Les sorties complètes sont sauvegardées dans `logs/tests_logs/<timestamp>.log`.
+Les tests vérifient notamment :
+- la création des tables `sirh`, `evaluation`, `sond`, `prediction_logs` et la cohérence du nombre de lignes insérées ;
+- l’intégrité du DataFrame fusionné (typage, absence de valeurs nulles sur la clé primaire, cohérence de la cible) ;
+- la robustesse du script de log des prédictions (insertion d’entrées dans `prediction_logs` et nettoyage) ;
+- la génération des logs de pipeline, regroupés dans `logs/pipeline_logs/<timestamp>.log`.
 --------
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 [![Issues][issues-shield]][issues-url]
 [![project_license][license-shield]][license-url]
 [![LinkedIn][linkedin-shield]][linkedin-url]
+![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)
 <!-- PROJECT LOGO -->
 <br />
 [mkdocs-url]: https://stephmnt.github.io/OCR_Projet05/
 [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
 [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
+![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)
+[![https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff]][[mkdocs-url](https://stephmnt.github.io/OCR_Projet05/)]
+![GitHub Release Date](https://img.shields.io/github/release-date/stephmnt/OCR_Projet05?display_date=published_at&style=flat-square)
+![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED Viewed

@@ -5,15 +5,265 @@ from pathlib import Path
 from typing import Any
 import gradio as gr
 import pandas as pd
 from loguru import logger
 from projet_05.branding import apply_brand_theme
 from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
 MODEL_PATH = Path("models/best_model.joblib")
 METADATA_PATH = Path("models/best_model_meta.json")
 SCHEMA_PATH = Path("data/processed/schema.json")
 def _load_schema(path: Path) -> dict[str, Any]:
@@ -54,6 +304,28 @@ def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
     return []
 def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
     """Normalize any user input into a validated DataFrame.
@@ -79,6 +351,195 @@ def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
     return df
 def _ensure_model():
     """Ensure that a pipeline has been loaded before inference."""
     if PIPELINE is None:
@@ -90,46 +551,77 @@ def _ensure_model():
 def score_table(table):
     """Score data entered via the interactive table."""
     _ensure_model()
-    df = _convert_input(table, FEATURE_ORDER)
     drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
-    return run_inference(
         df,
         PIPELINE,
         THRESHOLD,
         drop_columns=drop_cols,
         required_features=FEATURE_ORDER or None,
     )
 def score_csv(upload):
     """Score a CSV uploaded by the user."""
     _ensure_model()
     if upload is None:
         raise gr.Error("Veuillez déposer un fichier CSV.")
     df = pd.read_csv(upload.name)
     drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
-    return run_inference(
         df,
         PIPELINE,
         THRESHOLD,
         drop_columns=drop_cols,
         required_features=FEATURE_ORDER or None,
     )
 def predict_from_form(*values):
     """Score a single row coming from the form tab."""
     _ensure_model()
-    if not FEATURE_ORDER:
         raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
-    payload = {feature: value for feature, value in zip(FEATURE_ORDER, values)}
     df = pd.DataFrame([payload])
     scored = run_inference(
         df,
         PIPELINE,
         THRESHOLD,
         required_features=FEATURE_ORDER or None,
     )
     row = scored.iloc[0]
     label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
     return {
@@ -157,11 +649,37 @@ except FileNotFoundError as exc:
     logger.warning("Artéfact manquant: {}", exc)
 FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
 with gr.Blocks(title="Prédicteur d'attrition") as demo:
-    gr.Markdown("# API Gradio – Prédiction de départ employé")
     gr.Markdown(
-        "Le modèle applique le pipeline entraîné hors-notebook pour fournir une probabilité de départ ainsi qu'une décision binaire."
     )
     if PIPELINE is None:
@@ -172,26 +690,22 @@ with gr.Blocks(title="Prédicteur d'attrition") as demo:
         gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
     with gr.Tab("Formulaire unitaire"):
-        if not FEATURE_ORDER:
             gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
         else:
             form_inputs: list[gr.components.Component] = [] # type: ignore
-            for feature in FEATURE_ORDER:
-                form_inputs.append(
-                    gr.Textbox(label=feature, placeholder=f"Saisir {feature.replace('_', ' ')}")
-                )
             form_output = gr.JSON(label="Résultat")
             gr.Button("Prédire").click(
                 fn=predict_from_form,
                 inputs=form_inputs,
                 outputs=form_output,
             )
-    with gr.Tab("Tableau interactif"):
         table_input = gr.Dataframe(
-            headers=FEATURE_ORDER if FEATURE_ORDER else None,
             row_count=(1, "dynamic"),
-            col_count=(len(FEATURE_ORDER), "dynamic") if FEATURE_ORDER else (5, "dynamic"),
             type="pandas",
         )
         table_output = gr.Dataframe(label="Prédictions", type="pandas")
@@ -201,7 +715,8 @@ with gr.Blocks(title="Prédicteur d'attrition") as demo:
             outputs=table_output,
         )
-    with gr.Tab("Fichier CSV"):
         file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
         file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
         gr.Button("Scorer le fichier").click(
@@ -210,6 +725,21 @@ with gr.Blocks(title="Prédicteur d'attrition") as demo:
             outputs=file_output,
         )
 if __name__ == "__main__":
     demo.launch()

 from typing import Any
 import gradio as gr
+import numpy as np
 import pandas as pd
 from loguru import logger
+from sqlalchemy import create_engine
+from sqlalchemy.engine import Engine
 from projet_05.branding import apply_brand_theme
 from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
+from projet_05.settings import load_settings
+from projet_05 import dataset as ds
 MODEL_PATH = Path("models/best_model.joblib")
 METADATA_PATH = Path("models/best_model_meta.json")
 SCHEMA_PATH = Path("data/processed/schema.json")
+DERIVED_FEATURES = {
+    "augmentation_par_revenu",
+    "annee_sur_poste_par_experience",
+    "nb_formation_par_experience",
+    "score_moyen_satisfaction",
+    "dern_promo_par_experience",
+    "evolution_note",
+}
+SATISFACTION_COLUMNS = [
+    "satisfaction_employee_environnement",
+    "satisfaction_employee_nature_travail",
+    "satisfaction_employee_equipe",
+    "satisfaction_employee_equilibre_pro_perso",
+]
+NUMERIC_CODE_COLUMNS = ["niveau_hierarchique_poste", "niveau_education"]
+NUMERIC_FEATURES: set[str] = set()
+CATEGORICAL_FEATURES: set[str] = set()
+# Configuration manuelle des champs d'entrée (label + placeholder).
+FIELD_UI_CONFIG = [
+    {"name": "age", "label": "Âge", "placeholder": "Âge en années (ex : 35)"},
+    {"name": "revenu_mensuel", "label": "Revenu mensuel (€)", "placeholder": "Montant mensuel en euros (ex : 4500)"},
+    {"name": "annees_dans_l_entreprise", "label": "Années dans l'entreprise", "placeholder": "Ancienneté totale (ex : 4.5)"},
+    {"name": "annees_dans_le_poste_actuel", "label": "Années sur le poste actuel", "placeholder": "Durée dans le poste (ex : 2)"},
+    {
+        "name": "annees_depuis_la_derniere_promotion",
+        "label": "Années depuis la dernière promotion",
+        "placeholder": "Durée depuis la dernière promotion (ex : 1)",
+    },
+    {
+        "name": "distance_domicile_travail",
+        "label": "Distance domicile-travail (km)",
+        "placeholder": "Distance en kilomètres (ex : 12)",
+    },
+    {
+        "name": "nombre_participation_pee",
+        "label": "Nombre de participations PEE",
+        "placeholder": "Nombre de participations (entier)",
+    },
+    {
+        "name": "note_evaluation_actuelle",
+        "label": "Note d'évaluation actuelle",
+        "placeholder": "Score actuel (1 à 5)",
+    },
+    {
+        "name": "note_evaluation_precedente",
+        "label": "Note d'évaluation précédente",
+        "placeholder": "Score précédent (1 à 5)",
+    },
+    {
+        "name": "annees_depuis_le_changement_deposte",
+        "label": "Années depuis le dernier changement de poste",
+        "placeholder": "Temps écoulé (ex : 0 si jamais)",
+    },
+    {
+        "name": "annee_experience_totale",
+        "label": "Années d'expérience totale",
+        "placeholder": "Expérience cumulative (ex : 8)",
+    },
+    {
+        "name": "nb_formations_suivies",
+        "label": "Nombre de formations suivies",
+        "placeholder": "Total des formations (entier)",
+    },
+    {
+        "name": "satisfaction_employee_environnement",
+        "label": "Satisfaction environnement",
+        "placeholder": "Note de 1 (faible) à 5 (forte)",
+        "info": "Valeur comprise entre 1 et 5",
+    },
+    {
+        "name": "satisfaction_employee_nature_travail",
+        "label": "Satisfaction nature du travail",
+        "placeholder": "Note de 1 à 5",
+        "info": "Valeur comprise entre 1 et 5",
+    },
+    {
+        "name": "satisfaction_employee_equipe",
+        "label": "Satisfaction équipe",
+        "placeholder": "Note de 1 à 5",
+        "info": "Valeur comprise entre 1 et 5",
+    },
+    {
+        "name": "satisfaction_employee_equilibre_pro_perso",
+        "label": "Satisfaction équilibre pro/perso",
+        "placeholder": "Note de 1 à 5",
+        "info": "Valeur comprise entre 1 et 5",
+    },
+    {
+        "name": "genre",
+        "label": "Genre",
+        "component": "dropdown",
+        "choices": ["Femme", "Homme"],
+        "info": "Sélectionnez le genre",
+    },
+    {
+        "name": "departement",
+        "label": "Département",
+        "component": "dropdown",
+        "choices": ["Commercial", "Consulting", "Ressources Humaines"],
+    },
+    {
+        "name": "frequence_deplacement",
+        "label": "Fréquence des déplacements",
+        "component": "dropdown",
+        "choices": ["Aucun", "Occasionnel", "Frequent"],
+    },
+    {
+        "name": "statut_marital",
+        "label": "Statut marital",
+        "component": "dropdown",
+        "choices": ["Célibataire", "Marié(e)", "Divorcé(e)"],
+    },
+    {
+        "name": "poste",
+        "label": "Poste occupé",
+        "component": "dropdown",
+        "choices": [
+            "Cadre Commercial",
+            "Assistant de Direction",
+            "Consultant",
+            "Tech Lead",
+            "Manager",
+            "Senior Manager",
+            "Représentant Commercial",
+            "Directeur Technique",
+            "Ressources Humaines",
+        ],
+    },
+    {
+        "name": "niveau_hierarchique_poste",
+        "label": "Niveau hiérarchique",
+        "component": "dropdown",
+        "choices": [
+            "1, junior",
+            "2",
+            "3",
+            "4",
+            "5, senior",
+        ],
+        "info": "Valeur numérique issue du SIRH (1 à 5)",
+    },
+    {
+        "name": "niveau_education",
+        "label": "Niveau d'études",
+        "component": "dropdown",
+        "choices": [
+            "1, licence",
+            "2",
+            "3",
+            "4",
+            "5, master",
+        ],
+        "info": "Indice numérique (1 à 5) figurant dans les exports bruts",
+    },
+    {
+        "name": "domaine_etude",
+        "label": "Domaine d'étude",
+        "component": "dropdown",
+        "choices": ["Entrepreunariat", "Infra & Cloud", "Marketing", "Ressources Humaines", "Transformation Digitale"],
+    },
+    {
+        "name": "heure_supplementaires",
+        "label": "Heures supplémentaires",
+        "component": "dropdown",
+        "choices": ["Oui", "Non"],
+    },
+]
+FIELD_UI_LOOKUP = {cfg["name"]: cfg for cfg in FIELD_UI_CONFIG}
+try:
+    SETTINGS = load_settings()
+except Exception:  # pragma: no cover - remains optional when config absent
+    SETTINGS = None
+CACHED_ENGINE: Engine | None = None
+CATEGORICAL_NORMALIZERS: dict[str, dict[str, str]] = {
+    "genre": {
+        "f": "F",
+        "femme": "F",
+        "m": "M",
+        "homme": "M",
+    },
+    "statut_marital": {
+        "célibataire": "Célibataire",
+        "celibataire": "Célibataire",
+        "marié(e)": "Marié(e)",
+        "marie(e)": "Marié(e)",
+        "marie": "Marié(e)",
+        "marié": "Marié(e)",
+        "divorcé(e)": "Divorcé(e)",
+        "divorce(e)": "Divorcé(e)",
+    },
+    "departement": {
+        "commercial": "Commercial",
+        "consulting": "Consulting",
+        "ressources humaines": "Ressources Humaines",
+    },
+    "poste": {
+        "cadre commercial": "Cadre Commercial",
+        "assistant de direction": "Assistant de Direction",
+        "consultant": "Consultant",
+        "tech lead": "Tech Lead",
+        "manager": "Manager",
+        "senior manager": "Senior Manager",
+        "représentant commercial": "Représentant Commercial",
+        "representant commercial": "Représentant Commercial",
+        "directeur technique": "Directeur Technique",
+        "ressources humaines": "Ressources Humaines",
+    },
+    "frequence_deplacement": {
+        "aucun": "Aucun",
+        "aucune": "Aucun",
+        "occasionnel": "Occasionnel",
+        "occasionnelle": "Occasionnel",
+        "frequent": "Frequent",
+        "fréquent": "Frequent",
+    },
+    "domaine_etude": {
+        "entrepreunariat": "Entrepreunariat",
+        "infra & cloud": "Infra & Cloud",
+        "infra et cloud": "Infra & Cloud",
+        "marketing": "Marketing",
+        "ressources humaines": "Ressources Humaines",
+        "transformation digitale": "Transformation Digitale",
+    },
+    "heure_supplementaires": {
+        "oui": "Oui",
+        "o": "Oui",
+        "y": "Oui",
+        "non": "Non",
+        "n": "Non",
+    },
+    "niveau_hierarchique_poste": {
+        "junior": "Junior",
+        "confirmé": "Confirmé",
+        "confirme": "Confirmé",
+        "direction": "Direction",
+        "senior": "Senior",
+    },
+    "niveau_education": {
+        "licence": "Licence",
+        "master": "Master",
+        "doctorat": "Doctorat",
+        "bts": "BTS",
+    },
+}
 def _load_schema(path: Path) -> dict[str, Any]:
     return []
+def _ensure_settings():
+    """Ensure configuration settings are available for data fusion."""
+    if SETTINGS is None:
+        raise gr.Error(
+            "Configuration introuvable. Placez `projet_05/settings.yml` dans le dépôt ou renseignez PROJET05_SETTINGS."
+        )
+    return SETTINGS
+def _get_db_engine(settings: Settings) -> Engine: # pyright: ignore[reportUndefinedVariable]
+    global CACHED_ENGINE
+    if CACHED_ENGINE is not None:
+        return CACHED_ENGINE
+    if not settings.db_url:
+        raise RuntimeError(
+            "Aucune URL de base de données n'a été fournie. Configurez `database.url` dans settings.yml."
+        )
+    CACHED_ENGINE = create_engine(settings.db_url, future=True)
+    return CACHED_ENGINE
 def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
     """Normalize any user input into a validated DataFrame.
     return df
+def _read_uploaded_csv(upload, label: str) -> pd.DataFrame:
+    """Load an uploaded CSV file or raise a user-friendly error."""
+    if upload is None:
+        raise gr.Error(f"Veuillez déposer le fichier {label}.")
+    try:
+        return pd.read_csv(upload.name)
+    except Exception as exc:  # pragma: no cover - delegated to pandas
+        raise gr.Error(f"Impossible de lire le fichier {label}: {exc}") from exc
+def _resolve_field_ui(feature: str) -> tuple[str, str, str | None, str, dict[str, Any]]:
+    """Return UI metadata (label, placeholder, info, component type, config)."""
+    config = FIELD_UI_LOOKUP.get(feature, {})
+    label = config.get("label") or feature.replace("_", " ").capitalize()
+    placeholder = config.get("placeholder") or f"Saisir {label.lower()}"
+    info = config.get("info")
+    component = config.get("component", "textbox")
+    return label, placeholder, info, component, config
+def _build_input_component(feature: str) -> gr.components.Component: # type: ignore
+    """Instantiate the appropriate Gradio component for a feature."""
+    label, placeholder, info, component, config = _resolve_field_ui(feature)
+    if component == "dropdown":
+        choices = config.get("choices") or []
+        default = config.get("default")
+        allow_custom = config.get("allow_custom_value", False)
+        return gr.Dropdown(
+            label=label,
+            choices=choices,
+            value=default,
+            info=info,
+            allow_custom_value=allow_custom,
+        )
+    return gr.Textbox(label=label, placeholder=placeholder, info=info)
+def _normalize_categorical_values(df: pd.DataFrame) -> pd.DataFrame:
+    """Normalize friendly categorical values into the codes used by the model."""
+    normalized = df.copy()
+    def _normalize_value(value, mapping: dict[str, str]):
+        if pd.isna(value):
+            return value
+        if isinstance(value, str):
+            cleaned = value.strip()
+            lowered = cleaned.lower()
+            return mapping.get(lowered, cleaned)
+        return mapping.get(value, value)
+    for column, mapping in CATEGORICAL_NORMALIZERS.items():
+        if column not in normalized.columns:
+            continue
+        normalized[column] = normalized[column].apply(lambda v, m=mapping: _normalize_value(v, m))
+    for column in NUMERIC_CODE_COLUMNS:
+        if column in normalized.columns:
+            extracted = (
+                normalized[column]
+                .astype(str)
+                .str.extract(r"(-?\d+(?:[.,]\d+)?)")[0]
+                .str.replace(",", ".", regex=False)
+            )
+            normalized[column] = pd.to_numeric(extracted, errors="coerce")
+    numeric_targets = [col for col in NUMERIC_FEATURES.union(DERIVED_FEATURES).union(NUMERIC_CODE_COLUMNS) if col in normalized.columns]
+    for column in numeric_targets:
+        normalized[column] = pd.to_numeric(normalized[column], errors="coerce")
+    return normalized
+def _apply_derived_features(df: pd.DataFrame) -> pd.DataFrame:
+    """Recompute engineered ratios so end-users do not have to provide them."""
+    enriched = _normalize_categorical_values(df)
+    def _safe_ratio(numerator: str, denominator: str, output: str) -> None:
+        if numerator not in enriched.columns or denominator not in enriched.columns:
+            return
+        numerator_series = pd.to_numeric(enriched[numerator], errors="coerce")
+        denominator_series = pd.to_numeric(enriched[denominator], errors="coerce").replace(0, pd.NA)
+        enriched[output] = numerator_series / denominator_series
+    prev_raise_col = "augementation_salaire_precedente"
+    if prev_raise_col in enriched:
+        normalized = (
+            enriched[prev_raise_col]
+            .astype(str)
+            .str.replace("%", "", regex=False)
+            .str.replace(",", ".", regex=False)
+            .str.strip()
+        )
+        enriched[prev_raise_col] = pd.to_numeric(normalized, errors="coerce") / 100
+    _safe_ratio("augementation_salaire_precedente", "revenu_mensuel", "augmentation_par_revenu")
+    _safe_ratio("annees_dans_le_poste_actuel", "annee_experience_totale", "annee_sur_poste_par_experience")
+    _safe_ratio("nb_formations_suivies", "annee_experience_totale", "nb_formation_par_experience")
+    _safe_ratio("annees_depuis_la_derniere_promotion", "annee_experience_totale", "dern_promo_par_experience")
+    existing_sats = [col for col in SATISFACTION_COLUMNS if col in enriched.columns]
+    if existing_sats:
+        enriched["score_moyen_satisfaction"] = pd.DataFrame(
+            {col: pd.to_numeric(enriched[col], errors="coerce") for col in existing_sats}
+        ).mean(axis=1)
+    if {"note_evaluation_actuelle", "note_evaluation_precedente"}.issubset(enriched.columns):
+        enriched["evolution_note"] = pd.to_numeric(
+            enriched["note_evaluation_actuelle"], errors="coerce"
+        ) - pd.to_numeric(enriched["note_evaluation_precedente"], errors="coerce")
+    return enriched.replace({pd.NA: np.nan})
+def _merge_raw_sources(sirh_upload, evaluation_upload, sond_upload) -> pd.DataFrame:
+    """Merge raw SIRH / evaluation / sondage CSVs uploaded by the user."""
+    settings = _ensure_settings()
+    sirh = ds.clean_text_values(_read_uploaded_csv(sirh_upload, "SIRH")).pipe(
+        ds._harmonize_id_column, settings.col_id, digits_only=True
+    )
+    evaluation = (
+        ds.clean_text_values(_read_uploaded_csv(evaluation_upload, "évaluation"))
+        .pipe(ds._rename_column, "eval_number", settings.col_id)
+        .pipe(ds._harmonize_id_column, settings.col_id, digits_only=True)
+    )
+    sond = (
+        ds.clean_text_values(_read_uploaded_csv(sond_upload, "sondage"))
+        .pipe(ds._rename_column, "code_sondage", settings.col_id)
+        .pipe(ds._harmonize_id_column, settings.col_id, digits_only=True)
+    )
+    for label, frame in {"SIRH": sirh, "évaluation": evaluation, "sondage": sond}.items():
+        if frame.empty:
+            raise gr.Error(f"Le fichier {label} est vide ou invalide.")
+        if settings.col_id not in frame.columns:
+            raise gr.Error(f"La colonne {settings.col_id} est absente du fichier {label}.")
+    merged = sirh.merge(evaluation, on=settings.col_id, how="inner").merge(sond, on=settings.col_id, how="inner")
+    if merged.empty:
+        raise gr.Error("Aucune ligne résultante après fusion des trois fichiers (jointure INNER vide).")
+    return merged
+def _log_predictions(source: str, raw_inputs: pd.DataFrame, scored: pd.DataFrame) -> None:
+    """Persist user interactions with the ML model into PostgreSQL."""
+    if SETTINGS is None or not SETTINGS.db_url:
+        return
+    settings = _ensure_settings()
+    try:
+        engine = _get_db_engine(settings)
+    except Exception as exc:  # pragma: no cover - logging best effort
+        logger.error("Connexion impossible pour logger les interactions: {}", exc)
+        return
+    payload = raw_inputs.reindex(scored.index).fillna(value=pd.NA)
+    col_id = settings.col_id
+    records = []
+    for idx, row in scored.iterrows():
+        original = payload.loc[idx].to_dict() if idx in payload.index else {} # type: ignore
+        records.append(
+            {
+                "id_employee": row.get(col_id),
+                "probability": float(row.get("proba_depart", 0.0)),
+                "decision": int(row.get("prediction", 0)),
+                "threshold": THRESHOLD,
+                "source": source,
+                "payload": json.dumps(original, ensure_ascii=False, default=str),
+            }
+        )
+    if not records:
+        return
+    try:
+        pd.DataFrame(records).to_sql(
+            "prediction_logs",
+            engine,
+            schema=settings.db_schema,
+            if_exists="append",
+            index=False,
+            method="multi",
+        )
+    except Exception as exc:  # pragma: no cover - logging best effort
+        logger.error("Impossible de journaliser les interactions: {}", exc)
 def _ensure_model():
     """Ensure that a pipeline has been loaded before inference."""
     if PIPELINE is None:
 def score_table(table):
     """Score data entered via the interactive table."""
     _ensure_model()
+    df = _convert_input(table, INPUT_FEATURES)
+    original = df.copy()
+    df = _apply_derived_features(df)
     drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
+    scored = run_inference(
         df,
         PIPELINE,
         THRESHOLD,
         drop_columns=drop_cols,
         required_features=FEATURE_ORDER or None,
     )
+    _log_predictions("interactive_table", original, scored)
+    return scored
 def score_csv(upload):
     """Score a CSV uploaded by the user."""
     _ensure_model()
     if upload is None:
         raise gr.Error("Veuillez déposer un fichier CSV.")
     df = pd.read_csv(upload.name)
+    original = df.copy()
+    df = _apply_derived_features(df)
     drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
+    scored = run_inference(
         df,
         PIPELINE,
         THRESHOLD,
         drop_columns=drop_cols,
         required_features=FEATURE_ORDER or None,
     )
+    _log_predictions("csv_file", original, scored)
+    return scored
+def score_raw_files(sirh_upload, evaluation_upload, sond_upload):
+    """Score three raw CSVs (SIRH, évaluation, sondage) after merging them."""
+    _ensure_model()
+    merged = _merge_raw_sources(sirh_upload, evaluation_upload, sond_upload)
+    original = merged.copy()
+    df = _apply_derived_features(merged)
+    drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
+    scored = run_inference(
+        df,
+        PIPELINE,
+        THRESHOLD,
+        drop_columns=drop_cols,
+        required_features=FEATURE_ORDER or None,
+    )
+    _log_predictions("raw_files", original, scored)
+    return scored
 def predict_from_form(*values):
     """Score a single row coming from the form tab."""
     _ensure_model()
+    if not INPUT_FEATURES:
         raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
+    payload = {feature: value for feature, value in zip(INPUT_FEATURES, values)}
     df = pd.DataFrame([payload])
+    original = df.copy()
+    df = _apply_derived_features(df)
     scored = run_inference(
         df,
         PIPELINE,
         THRESHOLD,
         required_features=FEATURE_ORDER or None,
     )
+    _log_predictions("form", original, scored)
     row = scored.iloc[0]
     label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
     return {
     logger.warning("Artéfact manquant: {}", exc)
 FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
+INPUT_FEATURES = [feature for feature in FEATURE_ORDER if feature not in DERIVED_FEATURES]
+if not INPUT_FEATURES:
+    INPUT_FEATURES = FEATURE_ORDER
+numeric_from_schema = set(SCHEMA.get("numerical_features", []))
+categorical_from_schema = set(SCHEMA.get("categorical_features", []))
+if not numeric_from_schema:
+    numeric_from_schema = set((METADATA.get("features", {}).get("numerical") or []))
+if not categorical_from_schema:
+    categorical_from_schema = set((METADATA.get("features", {}).get("categorical") or []))
+NUMERIC_FEATURES = numeric_from_schema
+CATEGORICAL_FEATURES = categorical_from_schema
 with gr.Blocks(title="Prédicteur d'attrition") as demo:
+    gr.Markdown("# OCR Projet 5 – Prédiction de départ employé")
+    gr.HTML(
+        """
+        <div style="display:flex; gap:0.5rem; flex-wrap:wrap;">
+            <a href="https://github.com/stephmnt/OCR_Projet05/releases" target="_blank" rel="noreferrer">
+                <img src="https://img.shields.io/github/v/release/stephmnt/OCR_Projet05" alt="GitHub Release" />
+            </a>
+            <a href="https://github.com/stephmnt/OCR_Projet05/actions/workflows/deploy.yml" target="_blank" rel="noreferrer">
+                <img src="https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml" alt="GitHub Actions Workflow Status" />
+            </a>
+            <a href="https://stephmnt.github.io/OCR_Projet05" target="_blank" rel="noreferrer">
+                <img src="https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff" alt="MkDocs" />
+            </a>
+        </div>
+        """
+    )
     gr.Markdown(
+        "Le modèle fournit une probabilité de départ ainsi qu'une décision binaire."
     )
     if PIPELINE is None:
         gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
     with gr.Tab("Formulaire unitaire"):
+        if not INPUT_FEATURES:
             gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
         else:
             form_inputs: list[gr.components.Component] = [] # type: ignore
+            for feature in INPUT_FEATURES:
+                form_inputs.append(_build_input_component(feature))
             form_output = gr.JSON(label="Résultat")
             gr.Button("Prédire").click(
                 fn=predict_from_form,
                 inputs=form_inputs,
                 outputs=form_output,
             )
         table_input = gr.Dataframe(
+            headers=INPUT_FEATURES if INPUT_FEATURES else None,
             row_count=(1, "dynamic"),
+            col_count=(len(INPUT_FEATURES), "dynamic") if INPUT_FEATURES else (5, "dynamic"),
             type="pandas",
         )
         table_output = gr.Dataframe(label="Prédictions", type="pandas")
             outputs=table_output,
         )
+    with gr.Tab("Fichier CSV fusionné"):
+        gr.Markdown("Un exemple de fichier à importer est disponible dans le dépôt github : [`references/sample_employees.csv`](https://github.com/stephmnt/OCR_Projet05/blob/main/references/sample_employees.csv)")
         file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
         file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
         gr.Button("Scorer le fichier").click(
             outputs=file_output,
         )
+    with gr.Tab("Fichiers non-mergés"):
+        gr.Markdown(
+            "Téléversez directement les trois fichiers bruts (SIRH, évaluation, sondage). "
+            "L'application reproduira automatiquement la fusion puis le scoring."
+        )
+        sirh_input = gr.File(file_types=[".csv"], label="Fichier SIRH")
+        evaluation_input = gr.File(file_types=[".csv"], label="Fichier Évaluation")
+        sond_input = gr.File(file_types=[".csv"], label="Fichier Sondage")
+        raw_output = gr.Dataframe(label="Résultats fusion automatique", type="pandas")
+        gr.Button("Fusionner et scorer").click(
+            fn=score_raw_files,
+            inputs=[sirh_input, evaluation_input, sond_input],
+            outputs=raw_output,
+        )
 if __name__ == "__main__":
     demo.launch()

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml CHANGED Viewed

@@ -1,4 +1,4 @@
-name: Deploy to Hugging Face Spaces
 on:
   push:
@@ -19,12 +19,16 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.10"
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
       - name: Deploy to Hugging Face Space
         env:
@@ -33,7 +37,7 @@ jobs:
           git config --global user.email "actions@github.com"
           git config --global user.name "GitHub Actions"
           git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
-          rsync -av --exclude '.git' --exclude 'output/' --exclude 'models/' ./ hf_space/
           cd hf_space
           git add .
           git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"

+name: Deploiement vers Hugging Face Spaces
 on:
   push:
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
+          python-version: "3.11"
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+          pip install -e .
+      - name: Préparer les données et le modèle
+        run: python main.py
       - name: Deploy to Hugging Face Space
         env:
           git config --global user.email "actions@github.com"
           git config --global user.name "GitHub Actions"
           git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
+          rsync -av --exclude '.git' --exclude 'docs' ./ hf_space/
           cd hf_space
           git add .
           git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/static.yml ADDED Viewed

	@@ -0,0 +1,37 @@

+name: Deploiement de la documentation
+on:
+  push:
+    branches: ["main"]
+  workflow_dispatch:
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+jobs:
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Setup Pages
+        uses: actions/configure-pages@v5
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: 'docs/site'
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore CHANGED Viewed

@@ -1,19 +1,18 @@
-# Data
-/data/
-# Mac OS-specific storage files
 .DS_Store
 *.code-workspace
 *.pdf
 /output/
 questions.md
-*.pdf
 # vim
 *.swp
 *.swo
 ## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
 # Byte-compiled / optimized / DLL files
@@ -86,9 +85,6 @@ instance/
 # Scrapy stuff:
 .scrapy
-# MkDocs documentation
-docs/site/
 # PyBuilder
 .pybuilder/
 target/
@@ -166,9 +162,6 @@ venv.bak/
 # Rope project settings
 .ropeproject
-# mkdocs documentation
-/site
 # mypy
 .mypy_cache/
 .dmypy.json

+# Spécifique à ce projet
 .DS_Store
 *.code-workspace
 *.pdf
 /output/
 questions.md
+/reports/
+/data/
+runtime.txt
 # vim
 *.swp
 *.swo
 ## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
 # Byte-compiled / optimized / DLL files
 # Scrapy stuff:
 .scrapy
 # PyBuilder
 .pybuilder/
 target/
 # Rope project settings
 .ropeproject
 # mypy
 .mypy_cache/
 .dmypy.json

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED Viewed

@@ -8,15 +8,17 @@ sdk_version: 5.49.1
 app_file: app.py
 pinned: true
 short_description: Projet 05 formation Openclassrooms
 ---
-# projet_05
 <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
     <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
 </a>
-Déployez un modèle de Machine Learning
 ## Organisation du projet
@@ -305,10 +307,6 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
 [stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
 [issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
 [issues-url]: https://github.com/stephmnt/OCR_projet05/issues
-[license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
-[license-url]: https://github.com/stephmnt/OCR_projet05/blob/master/LICENSE.txt
-[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
-[linkedin-url]: https://linkedin.com/in/stephanemanet
 [product-screenshot]: images/screenshot.png
 [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
 <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
@@ -328,9 +326,15 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
 [Bootstrap-url]: https://getbootstrap.com
 [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
 [JQuery-url]: https://jquery.com
 <!-- TODO: -->
-[Postgres]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
-[Python]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
-[MkDocs]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
 [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
 [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)

 app_file: app.py
 pinned: true
 short_description: Projet 05 formation Openclassrooms
+python_version: 3.11
 ---
+# projet_05 : Déployez un modèle de Machine Learning
 <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
     <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
 </a>
+[![mkdocs-shield]][mkdocs-url]
 ## Organisation du projet
 [stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
 [issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
 [issues-url]: https://github.com/stephmnt/OCR_projet05/issues
 [product-screenshot]: images/screenshot.png
 [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
 <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
 [Bootstrap-url]: https://getbootstrap.com
 [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
 [JQuery-url]: https://jquery.com
+<!-- OK -->
+[license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
+[license-url]: https://github.com/stephmnt/OCR_Projet05/blob/main/LICENSE
+[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
+[linkedin-url]: https://linkedin.com/in/stephanemanet
 <!-- TODO: -->
+[postgres-shield]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
+[python-shield]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
+[mkdocs-shield]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
+[mkdocs-url]: https://stephmnt.github.io/OCR_Projet05/
 [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
 [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED Viewed

@@ -17,12 +17,30 @@ SCHEMA_PATH = Path("data/processed/schema.json")
 def _load_schema(path: Path) -> dict[str, Any]:
     if not path.exists():
         return {}
     return json.loads(path.read_text(encoding="utf-8"))
 def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
     if schema:
         candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
         if candidates:
@@ -37,6 +55,18 @@ def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
 def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
     if isinstance(payload, pd.DataFrame):
         df = payload.copy()
     elif payload is None:
@@ -50,6 +80,7 @@ def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
 def _ensure_model():
     if PIPELINE is None:
         raise gr.Error(
             "Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
@@ -57,6 +88,7 @@ def _ensure_model():
 def score_table(table):
     _ensure_model()
     df = _convert_input(table, FEATURE_ORDER)
     drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
@@ -70,6 +102,7 @@ def score_table(table):
 def score_csv(upload):
     _ensure_model()
     if upload is None:
         raise gr.Error("Veuillez déposer un fichier CSV.")
@@ -85,6 +118,7 @@ def score_csv(upload):
 def predict_from_form(*values):
     _ensure_model()
     if not FEATURE_ORDER:
         raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
@@ -132,7 +166,7 @@ with gr.Blocks(title="Prédicteur d'attrition") as demo:
     if PIPELINE is None:
         gr.Markdown(
-            "⚠️ **Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
         )
     else:
         gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")

 def _load_schema(path: Path) -> dict[str, Any]:
+    """Load the schema definition stored as JSON.
+    Args:
+        path: Path to the schema.json file.
+    Returns:
+        A dictionary describing the schema or an empty dict if the file is missing.
+    """
     if not path.exists():
         return {}
     return json.loads(path.read_text(encoding="utf-8"))
 def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
+    """Infer the ordered list of features expected by the model.
+    Args:
+        metadata: Metadata produced during training.
+        schema: Schema derived from `features.py`.
+        pipeline: Loaded sklearn pipeline (optional).
+    Returns:
+        List of feature names in the order expected by the model.
+    """
     if schema:
         candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
         if candidates:
 def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
+    """Normalize any user input into a validated DataFrame.
+    Args:
+        payload: Raw table coming from Gradio (DataFrame, list, etc.).
+        headers: Expected column names.
+    Returns:
+        A sanitized DataFrame.
+    Raises:
+        gr.Error: If no valid row is provided.
+    """
     if isinstance(payload, pd.DataFrame):
         df = payload.copy()
     elif payload is None:
 def _ensure_model():
+    """Ensure that a pipeline has been loaded before inference."""
     if PIPELINE is None:
         raise gr.Error(
             "Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
 def score_table(table):
+    """Score data entered via the interactive table."""
     _ensure_model()
     df = _convert_input(table, FEATURE_ORDER)
     drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
 def score_csv(upload):
+    """Score a CSV uploaded by the user."""
     _ensure_model()
     if upload is None:
         raise gr.Error("Veuillez déposer un fichier CSV.")
 def predict_from_form(*values):
+    """Score a single row coming from the form tab."""
     _ensure_model()
     if not FEATURE_ORDER:
         raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
     if PIPELINE is None:
         gr.Markdown(
+            "**Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
         )
     else:
         gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore CHANGED Viewed

@@ -4,6 +4,11 @@
 # Mac OS-specific storage files
 .DS_Store
 *.code-workspace
 # vim
 *.swp

 # Mac OS-specific storage files
 .DS_Store
 *.code-workspace
+*.pdf
+/output/
+questions.md
+*.pdf
 # vim
 *.swp

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED Viewed

@@ -1,5 +1,3 @@
-# projet_05
 ---
 title: OCR_Projet05
 emoji: 🔥
@@ -12,6 +10,8 @@ pinned: true
 short_description: Projet 05 formation Openclassrooms
 ---
 <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
     <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
 </a>
@@ -76,17 +76,6 @@ Déployez un modèle de Machine Learning
 --------
----
-title: Projet 05
-emoji: 👀
-colorFrom: indigo
-colorTo: green
-sdk: gradio
-sdk_version: 5.49.1
-app_file: app.py
-pinned: false
----
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 <!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
@@ -99,8 +88,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
 *** Thanks again! Now go create something AMAZING! :D
 -->
 <!-- PROJECT SHIELDS -->
 <!--
 *** I'm using markdown "reference style" links for readability.
@@ -118,8 +105,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
 [![LinkedIn][linkedin-shield]][linkedin-url]
 ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
 <!-- PROJECT LOGO -->
 <br />
 <div align="center">
@@ -143,8 +128,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
   </p>
 </div>
 <!-- TABLE OF CONTENTS -->
 <details>
   <summary>Table of Contents</summary>
@@ -191,8 +174,6 @@ Here's a blank template to get started. To avoid retyping too much info, do a se
 <p align="right">(<a href="#readme-top">back to top</a>)</p>
 <!-- GETTING STARTED -->
 ## Getting Started
@@ -212,20 +193,19 @@ This is an example of how to list things you need to use the software and how to
 pip install -r requirements.txt
 uvicorn app.main:app --reload
-1. Get a free API Key at [https://example.com](https://example.com)
-2. Clone the repo
    ```sh
-   git clone https://github.com/github_username/repo_name.git
    ```
-3. Install NPM packages
    ```sh
    npm install
    ```
-4. Enter your API in `config.js`
    ```js
    const API_KEY = 'ENTER YOUR API';
    ```
-5. Change git remote url to avoid accidental pushes to base project
    ```sh
    git remote set-url origin github_username/repo_name
    git remote -v # confirm the changes

 ---
 title: OCR_Projet05
 emoji: 🔥
 short_description: Projet 05 formation Openclassrooms
 ---
+# projet_05
 <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
     <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
 </a>
 --------
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 <!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
 *** Thanks again! Now go create something AMAZING! :D
 -->
 <!-- PROJECT SHIELDS -->
 <!--
 *** I'm using markdown "reference style" links for readability.
 [![LinkedIn][linkedin-shield]][linkedin-url]
 ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
 <!-- PROJECT LOGO -->
 <br />
 <div align="center">
   </p>
 </div>
 <!-- TABLE OF CONTENTS -->
 <details>
   <summary>Table of Contents</summary>
 <p align="right">(<a href="#readme-top">back to top</a>)</p>
 <!-- GETTING STARTED -->
 ## Getting Started
 pip install -r requirements.txt
 uvicorn app.main:app --reload
+1. Clone the repo
    ```sh
+   git clone https://github.com/stephmnt/OCR_Projet05.git
    ```
+2. Install NPM packages
    ```sh
    npm install
    ```
+3. Enter your API in `config.js`
    ```js
    const API_KEY = 'ENTER YOUR API';
    ```
+4. Change git remote url to avoid accidental pushes to base project
    ```sh
    git remote set-url origin github_username/repo_name
    git remote -v # confirm the changes

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml CHANGED Viewed

@@ -33,8 +33,8 @@ jobs:
           git config --global user.email "actions@github.com"
           git config --global user.name "GitHub Actions"
           git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
-          rsync -av --exclude '.git' ./ hf_space/
           cd hf_space
           git add .
           git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
-          git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main

           git config --global user.email "actions@github.com"
           git config --global user.name "GitHub Actions"
           git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
+          rsync -av --exclude '.git' --exclude 'output/' --exclude 'models/' ./ hf_space/
           cd hf_space
           git add .
           git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
+          git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # projet_05
 <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
     <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
 </a>
@@ -57,6 +69,11 @@ Déployez un modèle de Machine Learning
     └── plots.py                <- Code to create visualizations
 ```
 --------
 ---
@@ -93,6 +110,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
 *** https://www.markdownguide.org/basic-syntax/#reference-style-links
 -->
 [![Contributors][contributors-shield]][contributors-url]
 [![Forks][forks-shield]][forks-url]
 [![Stargazers][stars-shield]][stars-url]
 [![Issues][issues-shield]][issues-url]
@@ -236,7 +254,7 @@ _For more examples, please refer to the [Documentation](https://example.com)_
 - [ ] Feature 3
     - [ ] Nested Feature
-See the [open issues](https://github.com/github_username/repo_name/issues) for a full list of proposed features (and known issues).
 <p align="right">(<a href="#readme-top">back to top</a>)</p>
@@ -299,18 +317,18 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
 <!-- MARKDOWN LINKS & IMAGES -->
 <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
-[contributors-shield]: https://img.shields.io/github/contributors/github_username/repo_name.svg?style=for-the-badge
-[contributors-url]: https://github.com/github_username/repo_name/graphs/contributors
-[forks-shield]: https://img.shields.io/github/forks/github_username/repo_name.svg?style=for-the-badge
-[forks-url]: https://github.com/github_username/repo_name/network/members
-[stars-shield]: https://img.shields.io/github/stars/github_username/repo_name.svg?style=for-the-badge
-[stars-url]: https://github.com/github_username/repo_name/stargazers
-[issues-shield]: https://img.shields.io/github/issues/github_username/repo_name.svg?style=for-the-badge
-[issues-url]: https://github.com/github_username/repo_name/issues
-[license-shield]: https://img.shields.io/github/license/github_username/repo_name.svg?style=for-the-badge
-[license-url]: https://github.com/github_username/repo_name/blob/master/LICENSE.txt
 [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
-[linkedin-url]: https://linkedin.com/in/linkedin_username
 [product-screenshot]: images/screenshot.png
 [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
 <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
@@ -331,10 +349,8 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
 [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
 [JQuery-url]: https://jquery.com
 <!-- TODO: -->
-[![Postgres](https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white)](#)
-[![Python](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)](#)
-[![Sphinx](https://img.shields.io/badge/Sphinx-000?logo=sphinx&logoColor=fff)](#)
-[![MkDocs](https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff)](#)
-[![NumPy](https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff)](#)
 [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
-[![Slack](https://img.shields.io/badge/Slack-4A154B?logo=slack&logoColor=fff)](#)[text](../projet_04/.gitignore)

 # projet_05
+---
+title: OCR_Projet05
+emoji: 🔥
+colorFrom: purple
+colorTo: purple
+sdk: gradio
+sdk_version: 5.49.1
+app_file: app.py
+pinned: true
+short_description: Projet 05 formation Openclassrooms
+---
 <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
     <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
 </a>
     └── plots.py                <- Code to create visualizations
 ```
+## Code hérité réutilisé
+- `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thème.
+- `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
 --------
 ---
 *** https://www.markdownguide.org/basic-syntax/#reference-style-links
 -->
 [![Contributors][contributors-shield]][contributors-url]
+[![Python][python]][python]
 [![Forks][forks-shield]][forks-url]
 [![Stargazers][stars-shield]][stars-url]
 [![Issues][issues-shield]][issues-url]
 - [ ] Feature 3
     - [ ] Nested Feature
+See the [open issues](https://github.com/stephmnt/OCR_projet05/issues) for a full list of proposed features (and known issues).
 <p align="right">(<a href="#readme-top">back to top</a>)</p>
 <!-- MARKDOWN LINKS & IMAGES -->
 <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
+[contributors-shield]: https://img.shields.io/github/contributors/stephmnt/OCR_projet05.svg?style=for-the-badge
+[contributors-url]: https://github.com/stephmnt/OCR_projet05/graphs/contributors
+[forks-shield]: https://img.shields.io/github/forks/stephmnt/OCR_projet05.svg?style=for-the-badge
+[forks-url]: https://github.com/stephmnt/OCR_projet05/network/members
+[stars-shield]: https://img.shields.io/github/stars/stephmnt/OCR_projet05.svg?style=for-the-badge
+[stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
+[issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
+[issues-url]: https://github.com/stephmnt/OCR_projet05/issues
+[license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
+[license-url]: https://github.com/stephmnt/OCR_projet05/blob/master/LICENSE.txt
 [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
+[linkedin-url]: https://linkedin.com/in/stephanemanet
 [product-screenshot]: images/screenshot.png
 [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
 <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
 [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
 [JQuery-url]: https://jquery.com
 <!-- TODO: -->
+[Postgres]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
+[Python]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
+[MkDocs]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
+[NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
 [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/LICENSE ADDED Viewed

	@@ -0,0 +1,10 @@

+The MIT License (MIT)
+Copyright (c) 2025, Stéphane Manet
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Makefile ADDED Viewed

	@@ -0,0 +1,85 @@

+#################################################################################
+# GLOBALS                                                                       #
+#################################################################################
+PROJECT_NAME = OCR_projet05
+PYTHON_VERSION = 3.10
+PYTHON_INTERPRETER = python
+#################################################################################
+# COMMANDS                                                                      #
+#################################################################################
+## Install Python dependencies
+.PHONY: requirements
+requirements:
+	pip install -e .
+## Delete all compiled Python files
+.PHONY: clean
+clean:
+	find . -type f -name "*.py[co]" -delete
+	find . -type d -name "__pycache__" -delete
+## Lint using ruff (use `make format` to do formatting)
+.PHONY: lint
+lint:
+	ruff format --check
+	ruff check
+## Format source code with ruff
+.PHONY: format
+format:
+	ruff check --fix
+	ruff format
+## Run tests
+.PHONY: test
+test:
+	python -m pytest tests
+## Set up Python interpreter environment
+.PHONY: create_environment
+create_environment:
+	@bash -c "if [ ! -z `which virtualenvwrapper.sh` ]; then source `which virtualenvwrapper.sh`; mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); else mkvirtualenv.bat $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); fi"
+	@echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
+#################################################################################
+# PROJECT RULES                                                                 #
+#################################################################################
+## Make dataset
+.PHONY: data
+data: requirements
+	$(PYTHON_INTERPRETER) projet_05/dataset.py
+#################################################################################
+# Self Documenting Commands                                                     #
+#################################################################################
+.DEFAULT_GOAL := help
+define PRINT_HELP_PYSCRIPT
+import re, sys; \
+lines = '\n'.join([line for line in sys.stdin]); \
+matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
+print('Available rules:\n'); \
+print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
+endef
+export PRINT_HELP_PYSCRIPT
+help:
+	@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED Viewed

@@ -1,7 +1,181 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Any
 import gradio as gr
+import pandas as pd
+from loguru import logger
+from projet_05.branding import apply_brand_theme
+from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
+MODEL_PATH = Path("models/best_model.joblib")
+METADATA_PATH = Path("models/best_model_meta.json")
+SCHEMA_PATH = Path("data/processed/schema.json")
+def _load_schema(path: Path) -> dict[str, Any]:
+    if not path.exists():
+        return {}
+    return json.loads(path.read_text(encoding="utf-8"))
+def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
+    if schema:
+        candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
+        if candidates:
+            return candidates
+    features = metadata.get("features", {})
+    explicit = (features.get("numerical") or []) + (features.get("categorical") or [])
+    if explicit:
+        return explicit
+    if pipeline is not None and hasattr(pipeline, "feature_names_in_"):
+        return list(pipeline.feature_names_in_)
+    return []
+def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
+    if isinstance(payload, pd.DataFrame):
+        df = payload.copy()
+    elif payload is None:
+        df = pd.DataFrame(columns=headers)
+    else:
+        df = pd.DataFrame(payload, columns=headers if headers else None)
+    df = df.dropna(how="all")
+    if df.empty:
+        raise gr.Error("Merci de saisir au moins une ligne complète.")
+    return df
+def _ensure_model():
+    if PIPELINE is None:
+        raise gr.Error(
+            "Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
+        )
+def score_table(table):
+    _ensure_model()
+    df = _convert_input(table, FEATURE_ORDER)
+    drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
+    return run_inference(
+        df,
+        PIPELINE,
+        THRESHOLD,
+        drop_columns=drop_cols,
+        required_features=FEATURE_ORDER or None,
+    )
+def score_csv(upload):
+    _ensure_model()
+    if upload is None:
+        raise gr.Error("Veuillez déposer un fichier CSV.")
+    df = pd.read_csv(upload.name)
+    drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
+    return run_inference(
+        df,
+        PIPELINE,
+        THRESHOLD,
+        drop_columns=drop_cols,
+        required_features=FEATURE_ORDER or None,
+    )
+def predict_from_form(*values):
+    _ensure_model()
+    if not FEATURE_ORDER:
+        raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
+    payload = {feature: value for feature, value in zip(FEATURE_ORDER, values)}
+    df = pd.DataFrame([payload])
+    scored = run_inference(
+        df,
+        PIPELINE,
+        THRESHOLD,
+        required_features=FEATURE_ORDER or None,
+    )
+    row = scored.iloc[0]
+    label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
+    return {
+        "probability": round(float(row["proba_depart"]), 4),
+        "decision": label,
+        "threshold": THRESHOLD,
+    }
+# Chargement des artéfacts
+apply_brand_theme()
+PIPELINE = None
+METADATA: dict[str, Any] = {}
+THRESHOLD = 0.5
+TARGET_COLUMN: str | None = None
+SCHEMA = _load_schema(SCHEMA_PATH)
+try:
+    PIPELINE = load_pipeline(MODEL_PATH)
+    METADATA = load_metadata(METADATA_PATH)
+    THRESHOLD = float(METADATA.get("best_threshold", THRESHOLD))
+    TARGET_COLUMN = METADATA.get("target")
+except FileNotFoundError as exc:
+    logger.warning("Artéfact manquant: {}", exc)
+FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
+with gr.Blocks(title="Prédicteur d'attrition") as demo:
+    gr.Markdown("# API Gradio – Prédiction de départ employé")
+    gr.Markdown(
+        "Le modèle applique le pipeline entraîné hors-notebook pour fournir une probabilité de départ ainsi qu'une décision binaire."
+    )
+    if PIPELINE is None:
+        gr.Markdown(
+            "⚠️ **Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
+        )
+    else:
+        gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
+    with gr.Tab("Formulaire unitaire"):
+        if not FEATURE_ORDER:
+            gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
+        else:
+            form_inputs: list[gr.components.Component] = [] # type: ignore
+            for feature in FEATURE_ORDER:
+                form_inputs.append(
+                    gr.Textbox(label=feature, placeholder=f"Saisir {feature.replace('_', ' ')}")
+                )
+            form_output = gr.JSON(label="Résultat")
+            gr.Button("Prédire").click(
+                fn=predict_from_form,
+                inputs=form_inputs,
+                outputs=form_output,
+            )
+    with gr.Tab("Tableau interactif"):
+        table_input = gr.Dataframe(
+            headers=FEATURE_ORDER if FEATURE_ORDER else None,
+            row_count=(1, "dynamic"),
+            col_count=(len(FEATURE_ORDER), "dynamic") if FEATURE_ORDER else (5, "dynamic"),
+            type="pandas",
+        )
+        table_output = gr.Dataframe(label="Prédictions", type="pandas")
+        gr.Button("Scorer les lignes").click(
+            fn=score_table,
+            inputs=table_input,
+            outputs=table_output,
+        )
+    with gr.Tab("Fichier CSV"):
+        file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
+        file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
+        gr.Button("Scorer le fichier").click(
+            fn=score_csv,
+            inputs=file_input,
+            outputs=file_output,
+        )
+if __name__ == "__main__":
+    demo.launch()

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml CHANGED Viewed

@@ -1,10 +1,13 @@
-name: Déployer vers Hugging Face Spaces
 on:
   push:
     branches:
       - main
 jobs:
   deploy:
     runs-on: ubuntu-latest
@@ -23,7 +26,7 @@ jobs:
           python -m pip install --upgrade pip
           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-      - name: Push to Hugging Face Space
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
@@ -33,5 +36,5 @@ jobs:
           rsync -av --exclude '.git' ./ hf_space/
           cd hf_space
           git add .
-          git commit -m "🚀 Auto-deploy from GitHub Actions"
-          git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main

+name: Deploy to Hugging Face Spaces
 on:
   push:
     branches:
       - main
+permissions:
+  contents: write
 jobs:
   deploy:
     runs-on: ubuntu-latest
           python -m pip install --upgrade pip
           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+      - name: Deploy to Hugging Face Space
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           rsync -av --exclude '.git' ./ hf_space/
           cd hf_space
           git add .
+          git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
+          git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore CHANGED Viewed

@@ -1,2 +1,192 @@
 *.code-workspace
-.venv/

+# Data
+/data/
+# Mac OS-specific storage files
+.DS_Store
 *.code-workspace
+# vim
+*.swp
+*.swo
+## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# MkDocs documentation
+docs/site/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   pixi.lock should be committed to version control for reproducibility
+#   .pixi/ contains the environments and should not be committed
+.pixi/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED Viewed

@@ -1,3 +1,64 @@
 ---
 title: Projet 05
 emoji: 👀
@@ -10,3 +71,270 @@ pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# projet_05
+<a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
+    <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
+</a>
+Déployez un modèle de Machine Learning
+## Organisation du projet
+```
+├── LICENSE            <- Open-source license if one is chosen
+├── Makefile           <- Makefile with convenience commands like `make data` or `make train`
+├── README.md          <- The top-level README for developers using this project.
+├── data
+│   ├── external       <- Data from third party sources.
+│   ├── interim        <- Intermediate data that has been transformed.
+│   ├── processed      <- The final, canonical data sets for modeling.
+│   └── raw            <- The original, immutable data dump.
+│
+├── docs               <- A default mkdocs project; see www.mkdocs.org for details
+│
+├── models             <- Trained and serialized models, model predictions, or model summaries
+│
+├── notebooks          <- Jupyter notebooks. Naming convention is a number (for ordering),
+│                         the creator's initials, and a short `-` delimited description, e.g.
+│                         `1.0-jqp-initial-data-exploration`.
+│
+├── pyproject.toml     <- Project configuration file with package metadata for
+│                         projet_05 and configuration for tools like black
+│
+├── references         <- Data dictionaries, manuals, and all other explanatory materials.
+│
+├── reports            <- Generated analysis as HTML, PDF, LaTeX, etc.
+│   └── figures        <- Generated graphics and figures to be used in reporting
+│
+├── requirements.txt   <- The requirements file for reproducing the analysis environment, e.g.
+│                         generated with `pip freeze > requirements.txt`
+│
+├── setup.cfg          <- Configuration file for flake8
+│
+└── projet_05   <- Source code for use in this project.
+    │
+    ├── __init__.py             <- Makes projet_05 a Python module
+    │
+    ├── config.py               <- Store useful variables and configuration
+    │
+    ├── dataset.py              <- Scripts to download or generate data
+    │
+    ├── features.py             <- Code to create features for modeling
+    │
+    ├── modeling
+    │   ├── __init__.py
+    │   ├── predict.py          <- Code to run model inference with trained models
+    │   └── train.py            <- Code to train models
+    │
+    └── plots.py                <- Code to create visualizations
+```
+--------
 ---
 title: Projet 05
 emoji: 👀
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+<!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
+<a id="readme-top"></a>
+<!--
+*** Thanks for checking out the Best-README-Template. If you have a suggestion
+*** that would make this better, please fork the repo and create a pull request
+*** or simply open an issue with the tag "enhancement".
+*** Don't forget to give the project a star!
+*** Thanks again! Now go create something AMAZING! :D
+-->
+<!-- PROJECT SHIELDS -->
+<!--
+*** I'm using markdown "reference style" links for readability.
+*** Reference links are enclosed in brackets [ ] instead of parentheses ( ).
+*** See the bottom of this document for the declaration of the reference variables
+*** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use.
+*** https://www.markdownguide.org/basic-syntax/#reference-style-links
+-->
+[![Contributors][contributors-shield]][contributors-url]
+[![Forks][forks-shield]][forks-url]
+[![Stargazers][stars-shield]][stars-url]
+[![Issues][issues-shield]][issues-url]
+[![project_license][license-shield]][license-url]
+[![LinkedIn][linkedin-shield]][linkedin-url]
+![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
+<!-- PROJECT LOGO -->
+<br />
+<div align="center">
+  <a href="https://github.com/github_username/repo_name">
+    <img src="images/logo.png" alt="Logo" width="80" height="80">
+  </a>
+<h3 align="center">project_title</h3>
+  <p align="center">
+    project_description
+    <br />
+    <a href="https://github.com/github_username/repo_name"><strong>Explore the docs »</strong></a>
+    <br />
+    <br />
+    <a href="https://github.com/github_username/repo_name">View Demo</a>
+    &middot;
+    <a href="https://github.com/github_username/repo_name/issues/new?labels=bug&template=bug-report---.md">Report Bug</a>
+    &middot;
+    <a href="https://github.com/github_username/repo_name/issues/new?labels=enhancement&template=feature-request---.md">Request Feature</a>
+  </p>
+</div>
+<!-- TABLE OF CONTENTS -->
+<details>
+  <summary>Table of Contents</summary>
+  <ol>
+    <li>
+      <a href="#about-the-project">About The Project</a>
+      <ul>
+        <li><a href="#built-with">Built With</a></li>
+      </ul>
+    </li>
+    <li>
+      <a href="#getting-started">Getting Started</a>
+      <ul>
+        <li><a href="#prerequisites">Prerequisites</a></li>
+        <li><a href="#installation">Installation</a></li>
+      </ul>
+    </li>
+    <li><a href="#usage">Usage</a></li>
+    <li><a href="#roadmap">Roadmap</a></li>
+    <li><a href="#contributing">Contributing</a></li>
+    <li><a href="#license">License</a></li>
+    <li><a href="#contact">Contact</a></li>
+    <li><a href="#acknowledgments">Acknowledgments</a></li>
+  </ol>
+</details>
+<!-- ABOUT THE PROJECT -->
+## About The Project
+[![Product Name Screen Shot][product-screenshot]](https://example.com)
+Here's a blank template to get started. To avoid retyping too much info, do a search and replace with your text editor for the following: `github_username`, `repo_name`, `twitter_handle`, `linkedin_username`, `email_client`, `email`, `project_title`, `project_description`, `project_license`
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
+### Built With
+* [![Python][Python]][Python-url]
+* [![SQL][SQL]][SQL-url]
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
+<!-- GETTING STARTED -->
+## Getting Started
+This is an example of how you may give instructions on setting up your project locally.
+To get a local copy up and running follow these simple example steps.
+### Prerequisites
+This is an example of how to list things you need to use the software and how to install them.
+* npm
+  ```sh
+  npm install npm@latest -g
+  ```
+### Installation
+pip install -r requirements.txt
+uvicorn app.main:app --reload
+1. Get a free API Key at [https://example.com](https://example.com)
+2. Clone the repo
+   ```sh
+   git clone https://github.com/github_username/repo_name.git
+   ```
+3. Install NPM packages
+   ```sh
+   npm install
+   ```
+4. Enter your API in `config.js`
+   ```js
+   const API_KEY = 'ENTER YOUR API';
+   ```
+5. Change git remote url to avoid accidental pushes to base project
+   ```sh
+   git remote set-url origin github_username/repo_name
+   git remote -v # confirm the changes
+   ```
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
+<!-- USAGE EXAMPLES -->
+## Usage
+Use this space to show useful examples of how a project can be used. Additional screenshots, code examples and demos work well in this space. You may also link to more resources.
+_For more examples, please refer to the [Documentation](https://example.com)_
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
+<!-- ROADMAP -->
+## Roadmap
+- [ ] Feature 1
+- [ ] Feature 2
+- [ ] Feature 3
+    - [ ] Nested Feature
+See the [open issues](https://github.com/github_username/repo_name/issues) for a full list of proposed features (and known issues).
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
+<!-- CONTRIBUTING -->
+## Contributing
+Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**.
+If you have a suggestion that would make this better, please fork the repo and create a pull request. You can also simply open an issue with the tag "enhancement".
+Don't forget to give the project a star! Thanks again!
+1. Fork the Project
+2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
+3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
+4. Push to the Branch (`git push origin feature/AmazingFeature`)
+5. Open a Pull Request
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
+### Top contributors:
+<a href="https://github.com/github_username/repo_name/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=github_username/repo_name" alt="contrib.rocks image" />
+</a>
+<!-- LICENSE -->
+## License
+Distributed under the project_license. See `LICENSE.txt` for more information.
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
+<!-- CONTACT -->
+## Contact
+Your Name - [@twitter_handle](https://twitter.com/twitter_handle) - email@email_client.com
+Project Link: [https://github.com/github_username/repo_name](https://github.com/github_username/repo_name)
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
+<!-- ACKNOWLEDGMENTS -->
+## Acknowledgments
+* []()
+* []()
+* []()
+<p align="right">(<a href="#readme-top">back to top</a>)</p>
+<!-- MARKDOWN LINKS & IMAGES -->
+<!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
+[contributors-shield]: https://img.shields.io/github/contributors/github_username/repo_name.svg?style=for-the-badge
+[contributors-url]: https://github.com/github_username/repo_name/graphs/contributors
+[forks-shield]: https://img.shields.io/github/forks/github_username/repo_name.svg?style=for-the-badge
+[forks-url]: https://github.com/github_username/repo_name/network/members
+[stars-shield]: https://img.shields.io/github/stars/github_username/repo_name.svg?style=for-the-badge
+[stars-url]: https://github.com/github_username/repo_name/stargazers
+[issues-shield]: https://img.shields.io/github/issues/github_username/repo_name.svg?style=for-the-badge
+[issues-url]: https://github.com/github_username/repo_name/issues
+[license-shield]: https://img.shields.io/github/license/github_username/repo_name.svg?style=for-the-badge
+[license-url]: https://github.com/github_username/repo_name/blob/master/LICENSE.txt
+[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
+[linkedin-url]: https://linkedin.com/in/linkedin_username
+[product-screenshot]: images/screenshot.png
+[Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
+<!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
+[Next.js]: https://img.shields.io/badge/next.js-000000?style=for-the-badge&logo=nextdotjs&logoColor=white
+[Next-url]: https://nextjs.org/
+[React.js]: https://img.shields.io/badge/React-20232A?style=for-the-badge&logo=react&logoColor=61DAFB
+[React-url]: https://reactjs.org/
+[Vue.js]: https://img.shields.io/badge/Vue.js-35495E?style=for-the-badge&logo=vuedotjs&logoColor=4FC08D
+[Vue-url]: https://vuejs.org/
+[Angular.io]: https://img.shields.io/badge/Angular-DD0031?style=for-the-badge&logo=angular&logoColor=white
+[Angular-url]: https://angular.io/
+[Svelte.dev]: https://img.shields.io/badge/Svelte-4A4A55?style=for-the-badge&logo=svelte&logoColor=FF3E00
+[Svelte-url]: https://svelte.dev/
+[Laravel.com]: https://img.shields.io/badge/Laravel-FF2D20?style=for-the-badge&logo=laravel&logoColor=white
+[Laravel-url]: https://laravel.com
+[Bootstrap.com]: https://img.shields.io/badge/Bootstrap-563D7C?style=for-the-badge&logo=bootstrap&logoColor=white
+[Bootstrap-url]: https://getbootstrap.com
+[JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
+[JQuery-url]: https://jquery.com
+<!-- TODO: -->
+[![Postgres](https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white)](#)
+[![Python](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)](#)
+[![Sphinx](https://img.shields.io/badge/Sphinx-000?logo=sphinx&logoColor=fff)](#)
+[![MkDocs](https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff)](#)
+[![NumPy](https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff)](#)
+[![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
+[![Slack](https://img.shields.io/badge/Slack-4A154B?logo=slack&logoColor=fff)](#)[text](../projet_04/.gitignore)

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py ADDED Viewed

File without changes

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import gradio as gr
+def greet(name):
+    return "Hello " + name + "!!"
+demo = gr.Interface(fn=greet, inputs="text", outputs="text")
+demo.launch()

	@@ -0,0 +1,37 @@

+name: Déployer vers Hugging Face Spaces
+on:
+  push:
+    branches:
+      - main
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+      - name: Push to Hugging Face Space
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          git config --global user.email "actions@github.com"
+          git config --global user.name "GitHub Actions"
+          git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
+          rsync -av --exclude '.git' ./ hf_space/
+          cd hf_space
+          git add .
+          git commit -m "🚀 Auto-deploy from GitHub Actions"
+          git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

	@@ -0,0 +1,2 @@


1	+ *.code-workspace
2	+ .venv/

	@@ -0,0 +1,12 @@

+---
+title: Projet 05
+emoji: 👀
+colorFrom: indigo
+colorTo: green
+sdk: gradio
+sdk_version: 5.49.1
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import gradio as gr
+def greet(name):
+    return "Hello " + name + "!!"
+demo = gr.Interface(fn=greet, inputs="text", outputs="text")
+demo.launch()

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import pytest
+from app.main import greet
+def test_greet_returns_string():
+    """Vérifie que la fonction retourne bien une chaîne de caractères."""
+    result = greet("Alice")
+    assert isinstance(result, str), "Le résultat doit être une chaîne de caractères."
+def test_greet_output_content():
+    """Vérifie que la fonction génère la phrase attendue."""
+    result = greet("Bob")
+    assert result == "Hello Bob!!", f"Résultat inattendu : {result}"
+def test_greet_with_empty_string():
+    """Vérifie le comportement si l’entrée est vide."""
+    result = greet("")
+    assert result == "Hello !!", "Le résultat doit gérer les entrées vides."

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep ADDED Viewed

File without changes

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [virtualenvs]
2	+ in-project = true

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from projet_05 import config # noqa: F401

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/config.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from pathlib import Path
+from dotenv import load_dotenv
+from loguru import logger
+# Load environment variables from .env file if it exists
+load_dotenv()
+# Paths
+PROJ_ROOT = Path(__file__).resolve().parents[1]
+logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")
+DATA_DIR = PROJ_ROOT / "data"
+RAW_DATA_DIR = DATA_DIR / "raw"
+INTERIM_DATA_DIR = DATA_DIR / "interim"
+PROCESSED_DATA_DIR = DATA_DIR / "processed"
+EXTERNAL_DATA_DIR = DATA_DIR / "external"
+MODELS_DIR = PROJ_ROOT / "models"
+REPORTS_DIR = PROJ_ROOT / "reports"
+FIGURES_DIR = REPORTS_DIR / "figures"
+# If tqdm is installed, configure loguru with tqdm.write
+# https://github.com/Delgan/loguru/issues/135
+try:
+    from tqdm import tqdm
+    logger.remove(0)
+    logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
+except ModuleNotFoundError:
+    pass

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from pathlib import Path
+from loguru import logger
+from tqdm import tqdm
+import typer
+from projet_05.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
+app = typer.Typer()
+@app.command()
+def main(
+    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+    input_path: Path = RAW_DATA_DIR / "dataset.csv",
+    output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
+    # ----------------------------------------------
+):
+    # ---- REPLACE THIS WITH YOUR OWN CODE ----
+    logger.info("Processing dataset...")
+    for i in tqdm(range(10), total=10):
+        if i == 5:
+            logger.info("Something happened for iteration 5.")
+    logger.success("Processing dataset complete.")
+    # -----------------------------------------
+if __name__ == "__main__":
+    app()

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from pathlib import Path
+from loguru import logger
+from tqdm import tqdm
+import typer
+from projet_05.config import PROCESSED_DATA_DIR
+app = typer.Typer()
+@app.command()
+def main(
+    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+    input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
+    output_path: Path = PROCESSED_DATA_DIR / "features.csv",
+    # -----------------------------------------
+):
+    # ---- REPLACE THIS WITH YOUR OWN CODE ----
+    logger.info("Generating features from dataset...")
+    for i in tqdm(range(10), total=10):
+        if i == 5:
+            logger.info("Something happened for iteration 5.")
+    logger.success("Features generation complete.")
+    # -----------------------------------------
+if __name__ == "__main__":
+    app()

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py ADDED Viewed

File without changes

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from pathlib import Path
+from loguru import logger
+from tqdm import tqdm
+import typer
+from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
+app = typer.Typer()
+@app.command()
+def main(
+    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+    features_path: Path = PROCESSED_DATA_DIR / "test_features.csv",
+    model_path: Path = MODELS_DIR / "model.pkl",
+    predictions_path: Path = PROCESSED_DATA_DIR / "test_predictions.csv",
+    # -----------------------------------------
+):
+    # ---- REPLACE THIS WITH YOUR OWN CODE ----
+    logger.info("Performing inference for model...")
+    for i in tqdm(range(10), total=10):
+        if i == 5:
+            logger.info("Something happened for iteration 5.")
+    logger.success("Inference complete.")
+    # -----------------------------------------
+if __name__ == "__main__":
+    app()

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from pathlib import Path
+from loguru import logger
+from tqdm import tqdm
+import typer
+from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
+app = typer.Typer()
+@app.command()
+def main(
+    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+    features_path: Path = PROCESSED_DATA_DIR / "features.csv",
+    labels_path: Path = PROCESSED_DATA_DIR / "labels.csv",
+    model_path: Path = MODELS_DIR / "model.pkl",
+    # -----------------------------------------
+):
+    # ---- REPLACE THIS WITH YOUR OWN CODE ----
+    logger.info("Training some model...")
+    for i in tqdm(range(10), total=10):
+        if i == 5:
+            logger.info("Something happened for iteration 5.")
+    logger.success("Modeling training complete.")
+    # -----------------------------------------
+if __name__ == "__main__":
+    app()

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/plots.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from pathlib import Path
+from loguru import logger
+from tqdm import tqdm
+import typer
+from projet_05.config import FIGURES_DIR, PROCESSED_DATA_DIR
+app = typer.Typer()
+@app.command()
+def main(
+    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
+    input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
+    output_path: Path = FIGURES_DIR / "plot.png",
+    # -----------------------------------------
+):
+    # ---- REPLACE THIS WITH YOUR OWN CODE ----
+    logger.info("Generating plot from data...")
+    for i in tqdm(range(10), total=10):
+        if i == 5:
+            logger.info("Something happened for iteration 5.")
+    logger.success("Plot generation complete.")
+    # -----------------------------------------
+if __name__ == "__main__":
+    app()

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml ADDED Viewed

	@@ -0,0 +1,53 @@

+[build-system]
+requires = ["flit_core >=3.2,<4"]
+build-backend = "flit_core.buildapi"
+[project]
+name = "projet_05"
+version = "0.0.1"
+description = "D\u00e9ployez un mod\u00e8le de Machine Learning"
+authors = [
+  { name = "St\u00e9phane Manet" },
+]
+license = { file = "LICENSE" }
+readme = "README.md"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License"
+]
+dependencies = [
+    "loguru",
+    "mkdocs",
+    "pip",
+    "pytest",
+    "python-dotenv",
+    "ruff",
+    "tqdm",
+    "typer",
+    "imbalanced-learn (>=0.14.0,<0.15.0)",
+    "scikit-learn (>=1.4.2,<2.0.0)",
+    "matplotlib (>=3.10.7,<4.0.0)",
+    "numpy (>=2.3.4,<3.0.0)",
+    "pandas (>=2.3.3,<3.0.0)",
+    "pyyaml (>=6.0.3,<7.0.0)",
+    "scipy (>=1.16.3,<2.0.0)",
+    "seaborn (>=0.13.2,<0.14.0)",
+    "shap (>=0.49.1,<0.50.0)",
+    "gradio (>=5.49.1,<6.0.0)",
+    "joblib (>=1.4.2,<2.0.0)"
+]
+requires-python = ">=3.11,<3.13"
+[tool.ruff]
+line-length = 99
+src = ["projet_05"]
+include = ["pyproject.toml", "projet_05/**/*.py"]
+[tool.ruff.lint]
+extend-select = ["I"]  # Add import sorting
+[tool.ruff.lint.isort]
+known-first-party = ["projet_05"]
+force-sort-within-sections = true

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/references/.gitkeep ADDED Viewed

File without changes

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/.gitkeep ADDED Viewed

File without changes

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep ADDED Viewed

File without changes

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_data.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import pytest
+def test_code_is_tested():
+    assert False

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py CHANGED Viewed

	@@ -1 +1,4 @@
1	from projet_05 import config # noqa: F401

 from projet_05 import config  # noqa: F401
+from projet_05.settings import Settings, load_settings  # noqa: F401
+__all__ = ["config", "Settings", "load_settings"]

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/branding.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from __future__ import annotations
+from functools import lru_cache
+from pathlib import Path
+from typing import Union
+from scripts_projet04.brand.brand import (  # type: ignore[import-not-found]
+    Theme,
+    ThemeConfig,
+    configure_brand,
+    load_brand,
+    make_diverging_cmap,
+)
+ROOT_DIR = Path(__file__).resolve().parents[1]
+DEFAULT_BRAND_PATH = ROOT_DIR / "scripts_projet04" / "brand" / "brand.yml"
+def _resolve_path(path: Union[str, Path, None]) -> Path:
+    if path is None:
+        return DEFAULT_BRAND_PATH
+    return Path(path).expanduser().resolve()
+@lru_cache(maxsize=1)
+def load_brand_config(path: Union[str, Path, None] = None) -> ThemeConfig:
+    """Load the brand YAML once and return the parsed ThemeConfig."""
+    cfg_path = _resolve_path(path)
+    return load_brand(cfg_path)
+@lru_cache(maxsize=1)
+def apply_brand_theme(path: Union[str, Path, None] = None) -> ThemeConfig:
+    """
+    Apply the OpenClassrooms/TechNova brand theme globally.
+    Returns the ThemeConfig so callers can inspect colors if needed.
+    """
+    cfg_path = _resolve_path(path)
+    cfg = configure_brand(cfg_path)
+    Theme.apply()
+    return cfg
+__all__ = [
+    "Theme",
+    "ThemeConfig",
+    "apply_brand_theme",
+    "load_brand_config",
+    "make_diverging_cmap",
+    "DEFAULT_BRAND_PATH",
+]

hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py CHANGED Viewed

@@ -1,28 +1,202 @@
 from pathlib import Path
 from loguru import logger
-from tqdm import tqdm
 import typer
-from projet_05.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
-app = typer.Typer()
 @app.command()
 def main(
-    # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
-    input_path: Path = RAW_DATA_DIR / "dataset.csv",
-    output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
-    # ----------------------------------------------
 ):
-    # ---- REPLACE THIS WITH YOUR OWN CODE ----
-    logger.info("Processing dataset...")
-    for i in tqdm(range(10), total=10):
-        if i == 5:
-            logger.info("Something happened for iteration 5.")
-    logger.success("Processing dataset complete.")
-    # -----------------------------------------
 if __name__ == "__main__":

+from __future__ import annotations
+import sqlite3
 from pathlib import Path
+import numpy as np
+import pandas as pd
 from loguru import logger
 import typer
+from projet_05.config import INTERIM_DATA_DIR
+from projet_05.settings import Settings, load_settings
+app = typer.Typer(help="Préparation et fusion des données sources.")
+# ---------------------------------------------------------------------------
+# Utilitaires
+# ---------------------------------------------------------------------------
+def safe_read_csv(path: Path, *, dtype=None) -> pd.DataFrame:
+    """Read a CSV file and return an empty frame when it fails."""
+    try:
+        logger.info("Lecture du fichier {}", path)
+        return pd.read_csv(path, dtype=dtype)
+    except FileNotFoundError:
+        logger.warning("Fichier absent: {}", path)
+        return pd.DataFrame()
+    except Exception as exc:  # pragma: no cover - log + empty dataframe
+        logger.error("Impossible de lire {} ({})", path, exc)
+        return pd.DataFrame()
+def clean_text_values(df: pd.DataFrame) -> pd.DataFrame:
+    """Normalize textual values that often materialize missing values."""
+    replace_tokens = [
+        "",
+        " ",
+        "  ",
+        "   ",
+        "nan",
+        "NaN",
+        "NAN",
+        "None",
+        "JE ne sais pas",
+        "je ne sais pas",
+        "Je ne sais pas",
+        "Unknow",
+        "Unknown",
+        "non pertinent",
+        "Non pertinent",
+        "NON PERTINENT",
+    ]
+    normalized = df.copy()
+    normalized = normalized.replace(replace_tokens, np.nan)
+    for column in normalized.select_dtypes(include="object"):
+        normalized[column] = (
+            normalized[column].replace(replace_tokens, np.nan).astype("string").str.strip()
+        )
+    return normalized
+def _harmonize_id_column(df: pd.DataFrame, column: str, *, digits_only: bool = True) -> pd.DataFrame:
+    data = df.copy()
+    if column not in data.columns:
+        return data
+    if digits_only:
+        extracted = data[column].astype(str).str.extract(r"(\\d+)")
+        data[column] = pd.to_numeric(extracted[0], errors="coerce")
+    data[column] = pd.to_numeric(data[column], errors="coerce").astype("Int64")
+    return data
+def _rename_column(df: pd.DataFrame, source: str, target: str) -> pd.DataFrame:
+    if source not in df.columns:
+        return df
+    return df.rename(columns={source: target})
+def _log_id_diagnostics(df: pd.DataFrame, *, name: str, col_id: str) -> None:
+    if col_id not in df.columns:
+        logger.warning("La colonne {} est absente du fichier {}.", col_id, name)
+        return
+    total = len(df)
+    uniques = df[col_id].nunique(dropna=True)
+    duplicates = total - uniques
+    logger.info(
+        "{name}: {total} lignes | {uniques} identifiants uniques | {duplicates} doublons",
+        name=name,
+        total=total,
+        uniques=uniques,
+        duplicates=duplicates,
+    )
+def _persist_sql_trace(df_dict: dict[str, pd.DataFrame], settings: Settings) -> pd.DataFrame:
+    """
+    Reproduire la fusion SQL décrite dans le notebook.
+    Chaque DataFrame est stocké dans une base SQLite éphémère pour
+    conserver une traçabilité de la requête exécutée.
+    """
+    db_path = settings.db_file
+    sql_path = settings.sql_file
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    sql_path.parent.mkdir(parents=True, exist_ok=True)
+    if db_path.exists():
+        db_path.unlink()
+    query = f"""
+    SELECT *
+    FROM sirh
+    INNER JOIN evaluation USING ({settings.col_id})
+    INNER JOIN sond USING ({settings.col_id});
+    """.strip()
+    with db_path.open("wb") as _:
+        pass  # just ensure the file exists for sqlite on some platforms
+    with sqlite3.connect(db_path) as conn:
+        for name, frame in df_dict.items():
+            frame.to_sql(name, conn, index=False, if_exists="replace")
+        merged = pd.read_sql_query(query, conn)
+    sql_path.write_text(query, encoding="utf-8")
+    return merged
+def build_dataset(settings: Settings) -> pd.DataFrame:
+    """Load, clean, harmonize and merge the three raw sources."""
+    sirh = clean_text_values(
+        safe_read_csv(settings.path_sirh).pipe(
+            _harmonize_id_column, settings.col_id, digits_only=True
+        )
+    )
+    evaluation = clean_text_values(
+        safe_read_csv(settings.path_eval)
+        .pipe(_rename_column, "eval_number", settings.col_id)
+        .pipe(_harmonize_id_column, settings.col_id, digits_only=True)
+    )
+    sond = clean_text_values(
+        safe_read_csv(settings.path_sondage)
+        .pipe(_rename_column, "code_sondage", settings.col_id)
+        .pipe(_harmonize_id_column, settings.col_id, digits_only=True)
+    )
+    for name, frame in {"sirh": sirh, "evaluation": evaluation, "sond": sond}.items():
+        _log_id_diagnostics(frame, name=name, col_id=settings.col_id)
+    frames = {
+        "sirh": sirh,
+        "evaluation": evaluation,
+        "sond": sond,
+    }
+    merged = _persist_sql_trace(frames, settings)
+    missing_cols = [settings.col_id] if settings.col_id not in merged.columns else []
+    if missing_cols:
+        raise KeyError(
+            f"La colonne {settings.col_id} est absente de la fusion finale. "
+            "Vérifiez vos fichiers sources."
+        )
+    logger.success("Fusion réalisée: {} lignes / {} colonnes", *merged.shape)
+    return merged
+def save_dataset(df: pd.DataFrame, output_path: Path) -> None:
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    df.to_csv(output_path, index=False)
+    logger.success("Fichier fusionné sauvegardé dans {}", output_path)
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
 @app.command()
 def main(
+    settings_path: Path = typer.Option(
+        None,
+        "--settings",
+        "-s",
+        help="Chemin vers un fichier settings.yml personnalisé.",
+    ),
+    output_path: Path = typer.Option(
+        INTERIM_DATA_DIR / "merged.csv",
+        "--output",
+        "-o",
+        help="Chemin de sortie du dataset fusionné.",
+    ),
 ):
+    """Entrypoint Typer pour reproduire la fusion des données brutes."""
+    settings = load_settings(settings_path) if settings_path else load_settings()
+    df = build_dataset(settings)
+    save_dataset(df, output_path)
 if __name__ == "__main__":