GitHub Actions commited on
Commit ·
1e83921
1
Parent(s): 4a7ca9a
đ Auto-deploy from GitHub Actions
Browse filesThis view is limited to 50 files because it contains too many changes. Â See raw diff
- README.md +5 -5
- hf_space/README.md +157 -323
- hf_space/hf_space/hf_space/hf_space/hf_space/Makefile +3 -1
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +1 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +66 -1
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +547 -17
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +7 -3
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/static.yml +37 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +5 -12
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +13 -9
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +35 -1
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +5 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +7 -27
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +2 -2
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +34 -18
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/LICENSE +10 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Makefile +85 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +178 -4
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +7 -4
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +191 -1
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +328 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py +0 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py +7 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +37 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes +35 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +2 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +12 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +7 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py +17 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep +0 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock +0 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.toml +2 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py +1 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/config.py +32 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py +29 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py +29 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py +0 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py +30 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py +30 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/plots.py +29 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml +53 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/references/.gitkeep +0 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/.gitkeep +0 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep +0 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_data.py +5 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py +3 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/branding.py +52 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py +188 -14
README.md
CHANGED
|
@@ -13,11 +13,11 @@ python_version: 3.11
|
|
| 13 |
|
| 14 |
# OCR Projet 05 â PrĂ©diction dâattrition
|
| 15 |
|
| 16 |
-

|
| 17 |
-

|
| 18 |
-
[![project_license]
|
| 19 |
-
![MkDocs]
|
| 20 |
-
](https://github.com/stephmnt/OCR_Projet05/actions/workflows/deploy.yml)
|
| 17 |
+
[](https://github.com/stephmnt/OCR_Projet05/releases)
|
| 18 |
+
[](https://github.com/stephmnt/OCR_Projet05/blob/main/LICENSE)
|
| 19 |
+
[](https://stephmnt.github.io/OCR_Projet05/)
|
| 20 |
+
[](https://cookiecutter-data-science.drivendata.org/)
|
| 21 |
|
| 22 |
Ce dĂ©pĂŽt contient le projet OCR_Projet05. Il sâagit dâune application Gradio dĂ©ployable sur Hugging Face Spaces, alimentĂ©e par un pipeline de prĂ©paration de donnĂ©es, un entraĂźnement automatique et des services dâinfĂ©rence orientĂ©s RH (prĂ©diction de dĂ©part dâemployĂ©s).
|
| 23 |
|
hf_space/README.md
CHANGED
|
@@ -11,395 +11,229 @@ short_description: Projet 05 formation Openclassrooms
|
|
| 11 |
python_version: 3.11
|
| 12 |
---
|
| 13 |
|
| 14 |
-
#
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
|
| 21 |
|
|
|
|
| 22 |
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
-
|
| 26 |
-
âââ LICENSE <- Open-source license if one is chosen
|
| 27 |
-
âââ Makefile <- Makefile with convenience commands like `make data` or `make train`
|
| 28 |
-
âââ README.md <- The top-level README for developers using this project.
|
| 29 |
-
âââ data
|
| 30 |
-
â âââ external <- Data from third party sources.
|
| 31 |
-
â âââ interim <- Intermediate data that has been transformed.
|
| 32 |
-
â âââ processed <- The final, canonical data sets for modeling.
|
| 33 |
-
â âââ raw <- The original, immutable data dump.
|
| 34 |
-
â
|
| 35 |
-
âââ docs <- A default mkdocs project; see www.mkdocs.org for details
|
| 36 |
-
â
|
| 37 |
-
âââ models <- Trained and serialized models, model predictions, or model summaries
|
| 38 |
-
â
|
| 39 |
-
âââ notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
|
| 40 |
-
â the creator's initials, and a short `-` delimited description, e.g.
|
| 41 |
-
â `1.0-jqp-initial-data-exploration`.
|
| 42 |
-
â
|
| 43 |
-
âââ pyproject.toml <- Project configuration file with package metadata for
|
| 44 |
-
â projet_05 and configuration for tools like black
|
| 45 |
-
â
|
| 46 |
-
âââ references <- Data dictionaries, manuals, and all other explanatory materials.
|
| 47 |
-
â
|
| 48 |
-
âââ reports <- Generated analysis as HTML, PDF, LaTeX, etc.
|
| 49 |
-
â âââ figures <- Generated graphics and figures to be used in reporting
|
| 50 |
-
â
|
| 51 |
-
âââ requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
|
| 52 |
-
â generated with `pip freeze > requirements.txt`
|
| 53 |
-
â
|
| 54 |
-
âââ setup.cfg <- Configuration file for flake8
|
| 55 |
-
â
|
| 56 |
-
âââ projet_05 <- Source code for use in this project.
|
| 57 |
-
â
|
| 58 |
-
âââ __init__.py <- Makes projet_05 a Python module
|
| 59 |
-
â
|
| 60 |
-
âââ config.py <- Store useful variables and configuration
|
| 61 |
-
â
|
| 62 |
-
âââ dataset.py <- Scripts to download or generate data
|
| 63 |
-
â
|
| 64 |
-
âââ features.py <- Code to create features for modeling
|
| 65 |
-
â
|
| 66 |
-
âââ modeling
|
| 67 |
-
â âââ __init__.py
|
| 68 |
-
â âââ predict.py <- Code to run model inference with trained models
|
| 69 |
-
â âââ train.py <- Code to train models
|
| 70 |
-
â
|
| 71 |
-
âââ plots.py <- Code to create visualizations
|
| 72 |
-
```
|
| 73 |
-
|
| 74 |
-
## Code hérité réutilisé
|
| 75 |
-
|
| 76 |
-
- `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thÚme.
|
| 77 |
-
- `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
|
| 78 |
-
|
| 79 |
-
## Base de données PostgreSQL
|
| 80 |
-
|
| 81 |
-
Depuis la branche `postgresql`, toute la fusion des fichiers bruts repose sur une base PostgreSQL accessible via SQLAlchemy.
|
| 82 |
-
|
| 83 |
-
1. Installez PostgreSQL (Homebrew, package officiel, etc.).
|
| 84 |
-
2. Créez un rÎle et la base attendue :
|
| 85 |
-
|
| 86 |
-
> Exemple pour MacOS
|
| 87 |
-
|
| 88 |
-
```bash
|
| 89 |
-
/opt/homebrew/opt/postgresql@17/bin/createuser -s postgres
|
| 90 |
-
/opt/homebrew/opt/postgresql@17/bin/psql -d postgres -c "ALTER USER postgres WITH PASSWORD 'postgres';"
|
| 91 |
-
/opt/homebrew/opt/postgresql@17/bin/createdb -O postgres projet05
|
| 92 |
-
```
|
| 93 |
-
|
| 94 |
-
Adaptez les chemins/versions selon votre environnement.
|
| 95 |
-
3. Renseignez la chaĂźne de connexion dans `projet_05/settings.yml` :
|
| 96 |
-
|
| 97 |
-
```yaml
|
| 98 |
-
database:
|
| 99 |
-
url: postgresql+psycopg://user:password@host:5432/projet05
|
| 100 |
-
schema: public
|
| 101 |
-
```
|
| 102 |
-
|
| 103 |
-
Il est également possible de définir `PROJET05_DATABASE_URL` dans l'environnement.
|
| 104 |
-
|
| 105 |
-
4. Initialisez la base (création des tables + insertion des CSV d'exemple) avec :
|
| 106 |
-
|
| 107 |
-
```bash
|
| 108 |
-
python -m scripts.init_db
|
| 109 |
-
```
|
| 110 |
-
|
| 111 |
-
5. Assurez-vous que l'utilisateur possÚde les droits `CREATE/DROP TABLE` dans le schéma ciblé : les tables `sirh`, `evaluation`, `sond` ainsi que `prediction_logs` seront créées ou recréées à chaque ré-exécution.
|
| 112 |
-
|
| 113 |
-
6. Lancez ensuite `python -m projet_05.dataset` comme auparavant (ou `python main.py` pour exĂ©cuter toutes les Ă©tapes). La requĂȘte SQL utilisĂ©e est toujours exportĂ©e dans `reports/merge_sql.sql` pour audit.
|
| 114 |
-
|
| 115 |
-
> Les interactions utilisateur/modÚle (qu'elles proviennent du formulaire, du tableau ou d'un upload) sont automatiquement journalisées dans la table `prediction_logs`, ce qui permet de tracer les usages et de constituer un dataset réel pour le monitoring.
|
| 116 |
-
|
| 117 |
-
## Tests & couverture
|
| 118 |
-
|
| 119 |
-
Une batterie de tests Pytest valident lâintĂ©gritĂ© de la base PostgreSQL, la fusion des donnĂ©es et la journalisation des prĂ©dictions.
|
| 120 |
-
|
| 121 |
-
1. Démarrez PostgreSQL (cf. section précédente) et créez un utilisateur ayant les droits `CREATE/DROP DATABASE`.
|
| 122 |
-
2. Facultatif : définissez `PROJET05_TEST_DATABASE_URL` si vous souhaitez utiliser une URL différente de `postgresql+psycopg://postgres:postgres@localhost:5432/projet05_test`.
|
| 123 |
-
3. Exécutez les tests et générez le rapport de couverture :
|
| 124 |
-
|
| 125 |
-
```bash
|
| 126 |
-
pytest
|
| 127 |
-
```
|
| 128 |
-
|
| 129 |
-
La configuration Pytest produit Ă la fois un rapport terminal (`--cov-report=term-missing`) et un fichier `coverage.xml` exploitable par vos outils CI/CD.
|
| 130 |
-
Les sorties complÚtes sont sauvegardées dans `logs/tests_logs/<timestamp>.log`.
|
| 131 |
-
|
| 132 |
-
Les tests vérifient notamment :
|
| 133 |
-
|
| 134 |
-
- la création des tables `sirh`, `evaluation`, `sond`, `prediction_logs` et la cohérence du nombre de lignes insérées ;
|
| 135 |
-
- lâintĂ©gritĂ© du DataFrame fusionnĂ© (typage, absence de valeurs nulles sur la clĂ© primaire, cohĂ©rence de la cible) ;
|
| 136 |
-
- la robustesse du script de log des prĂ©dictions (insertion dâentrĂ©es dans `prediction_logs` et nettoyage) ;
|
| 137 |
-
- la génération des logs de pipeline, regroupés dans `logs/pipeline_logs/<timestamp>.log`.
|
| 138 |
-
|
| 139 |
-
--------
|
| 140 |
|
| 141 |
-
|
| 142 |
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
***
|
| 147 |
-
*** that would make this better, please fork the repo and create a pull request
|
| 148 |
-
*** or simply open an issue with the tag "enhancement".
|
| 149 |
-
*** Don't forget to give the project a star!
|
| 150 |
-
*** Thanks again! Now go create something AMAZING! :D
|
| 151 |
-
-->
|
| 152 |
|
| 153 |
-
|
| 154 |
-
<!--
|
| 155 |
-
*** I'm using markdown "reference style" links for readability.
|
| 156 |
-
*** Reference links are enclosed in brackets [ ] instead of parentheses ( ).
|
| 157 |
-
*** See the bottom of this document for the declaration of the reference variables
|
| 158 |
-
*** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use.
|
| 159 |
-
*** https://www.markdownguide.org/basic-syntax/#reference-style-links
|
| 160 |
-
-->
|
| 161 |
-
[![Contributors][contributors-shield]][contributors-url]
|
| 162 |
-
[![Python][python]][python]
|
| 163 |
-
[![Forks][forks-shield]][forks-url]
|
| 164 |
-
[![Stargazers][stars-shield]][stars-url]
|
| 165 |
-
[![Issues][issues-shield]][issues-url]
|
| 166 |
-
[![project_license][license-shield]][license-url]
|
| 167 |
-
[![LinkedIn][linkedin-shield]][linkedin-url]
|
| 168 |
-

|
| 169 |
-
|
| 170 |
-
<!-- PROJECT LOGO -->
|
| 171 |
-
<br />
|
| 172 |
-
<div align="center">
|
| 173 |
-
<a href="https://github.com/github_username/repo_name">
|
| 174 |
-
<img src="images/logo.png" alt="Logo" width="80" height="80">
|
| 175 |
-
</a>
|
| 176 |
-
|
| 177 |
-
<h3 align="center">project_title</h3>
|
| 178 |
-
|
| 179 |
-
<p align="center">
|
| 180 |
-
project_description
|
| 181 |
-
<br />
|
| 182 |
-
<a href="https://github.com/github_username/repo_name"><strong>Explore the docs »</strong></a>
|
| 183 |
-
<br />
|
| 184 |
-
<br />
|
| 185 |
-
<a href="https://github.com/github_username/repo_name">View Demo</a>
|
| 186 |
-
·
|
| 187 |
-
<a href="https://github.com/github_username/repo_name/issues/new?labels=bug&template=bug-report---.md">Report Bug</a>
|
| 188 |
-
·
|
| 189 |
-
<a href="https://github.com/github_username/repo_name/issues/new?labels=enhancement&template=feature-request---.md">Request Feature</a>
|
| 190 |
-
</p>
|
| 191 |
-
</div>
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
</li>
|
| 203 |
-
<li>
|
| 204 |
-
<a href="#getting-started">Getting Started</a>
|
| 205 |
-
<ul>
|
| 206 |
-
<li><a href="#prerequisites">Prerequisites</a></li>
|
| 207 |
-
<li><a href="#installation">Installation</a></li>
|
| 208 |
-
</ul>
|
| 209 |
-
</li>
|
| 210 |
-
<li><a href="#usage">Usage</a></li>
|
| 211 |
-
<li><a href="#roadmap">Roadmap</a></li>
|
| 212 |
-
<li><a href="#contributing">Contributing</a></li>
|
| 213 |
-
<li><a href="#license">License</a></li>
|
| 214 |
-
<li><a href="#contact">Contact</a></li>
|
| 215 |
-
<li><a href="#acknowledgments">Acknowledgments</a></li>
|
| 216 |
-
</ol>
|
| 217 |
-
</details>
|
| 218 |
|
|
|
|
| 219 |
|
|
|
|
| 220 |
|
| 221 |
-
|
| 222 |
-
## About The Project
|
| 223 |
|
| 224 |
-
|
|
|
|
|
|
|
| 225 |
|
| 226 |
-
|
| 227 |
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
|
|
|
| 230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
-
|
| 233 |
|
| 234 |
-
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
| 236 |
|
| 237 |
-
|
| 238 |
|
| 239 |
-
|
| 240 |
-
## Getting Started
|
| 241 |
|
| 242 |
-
|
| 243 |
-
To get a local copy up and running follow these simple example steps.
|
| 244 |
|
| 245 |
-
###
|
| 246 |
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
npm install npm@latest -g
|
| 251 |
-
```
|
| 252 |
|
| 253 |
-
|
| 254 |
|
| 255 |
-
|
| 256 |
-
uvicorn app.main:app --reload
|
| 257 |
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
```
|
| 262 |
-
2. Install NPM packages
|
| 263 |
-
```sh
|
| 264 |
-
npm install
|
| 265 |
-
```
|
| 266 |
-
3. Enter your API in `config.js`
|
| 267 |
-
```js
|
| 268 |
-
const API_KEY = 'ENTER YOUR API';
|
| 269 |
-
```
|
| 270 |
-
4. Change git remote url to avoid accidental pushes to base project
|
| 271 |
-
```sh
|
| 272 |
-
git remote set-url origin github_username/repo_name
|
| 273 |
-
git remote -v # confirm the changes
|
| 274 |
-
```
|
| 275 |
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
|
|
|
|
| 278 |
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
-
|
| 281 |
-
## Usage
|
| 282 |
|
| 283 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
|
| 285 |
-
|
| 286 |
|
| 287 |
-
|
| 288 |
|
|
|
|
| 289 |
|
|
|
|
| 290 |
|
| 291 |
-
|
| 292 |
-
## Roadmap
|
| 293 |
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
|
| 299 |
-
|
| 300 |
|
| 301 |
-
|
|
|
|
| 302 |
|
|
|
|
| 303 |
|
|
|
|
| 304 |
|
| 305 |
-
|
| 306 |
-
## Contributing
|
| 307 |
|
| 308 |
-
|
|
|
|
|
|
|
| 309 |
|
| 310 |
-
|
| 311 |
-
Don't forget to give the project a star! Thanks again!
|
| 312 |
|
| 313 |
-
|
| 314 |
-
2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
|
| 315 |
-
3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
|
| 316 |
-
4. Push to the Branch (`git push origin feature/AmazingFeature`)
|
| 317 |
-
5. Open a Pull Request
|
| 318 |
|
| 319 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
|
| 321 |
-
|
| 322 |
|
| 323 |
-
|
| 324 |
-
<img src="https://contrib.rocks/image?repo=github_username/repo_name" alt="contrib.rocks image" />
|
| 325 |
-
</a>
|
| 326 |
|
|
|
|
|
|
|
|
|
|
| 327 |
|
|
|
|
| 328 |
|
| 329 |
-
|
| 330 |
-
## License
|
| 331 |
|
| 332 |
-
|
| 333 |
|
| 334 |
-
|
|
|
|
|
|
|
| 335 |
|
|
|
|
|
|
|
| 336 |
|
|
|
|
| 337 |
|
| 338 |
-
|
| 339 |
-
|
| 340 |
|
| 341 |
-
|
| 342 |
|
| 343 |
-
|
| 344 |
|
| 345 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
|
|
|
|
| 347 |
|
|
|
|
| 348 |
|
| 349 |
-
|
| 350 |
-
## Acknowledgments
|
| 351 |
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
|
|
|
| 355 |
|
| 356 |
-
|
| 357 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
|
|
|
|
| 359 |
|
| 360 |
-
|
| 361 |
-
<!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
|
| 362 |
-
[contributors-shield]: https://img.shields.io/github/contributors/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 363 |
-
[contributors-url]: https://github.com/stephmnt/OCR_projet05/graphs/contributors
|
| 364 |
-
[forks-shield]: https://img.shields.io/github/forks/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 365 |
-
[forks-url]: https://github.com/stephmnt/OCR_projet05/network/members
|
| 366 |
-
[stars-shield]: https://img.shields.io/github/stars/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 367 |
-
[stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
|
| 368 |
-
[issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 369 |
-
[issues-url]: https://github.com/stephmnt/OCR_projet05/issues
|
| 370 |
-
[product-screenshot]: images/screenshot.png
|
| 371 |
-
[Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
|
| 372 |
-
<!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
|
| 373 |
-
[Next.js]: https://img.shields.io/badge/next.js-000000?style=for-the-badge&logo=nextdotjs&logoColor=white
|
| 374 |
-
[Next-url]: https://nextjs.org/
|
| 375 |
-
[React.js]: https://img.shields.io/badge/React-20232A?style=for-the-badge&logo=react&logoColor=61DAFB
|
| 376 |
-
[React-url]: https://reactjs.org/
|
| 377 |
-
[Vue.js]: https://img.shields.io/badge/Vue.js-35495E?style=for-the-badge&logo=vuedotjs&logoColor=4FC08D
|
| 378 |
-
[Vue-url]: https://vuejs.org/
|
| 379 |
-
[Angular.io]: https://img.shields.io/badge/Angular-DD0031?style=for-the-badge&logo=angular&logoColor=white
|
| 380 |
-
[Angular-url]: https://angular.io/
|
| 381 |
-
[Svelte.dev]: https://img.shields.io/badge/Svelte-4A4A55?style=for-the-badge&logo=svelte&logoColor=FF3E00
|
| 382 |
-
[Svelte-url]: https://svelte.dev/
|
| 383 |
-
[Laravel.com]: https://img.shields.io/badge/Laravel-FF2D20?style=for-the-badge&logo=laravel&logoColor=white
|
| 384 |
-
[Laravel-url]: https://laravel.com
|
| 385 |
-
[Bootstrap.com]: https://img.shields.io/badge/Bootstrap-563D7C?style=for-the-badge&logo=bootstrap&logoColor=white
|
| 386 |
-
[Bootstrap-url]: https://getbootstrap.com
|
| 387 |
-
[JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
|
| 388 |
-
[JQuery-url]: https://jquery.com
|
| 389 |
-
<!-- OK -->
|
| 390 |
-
[license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 391 |
-
[license-url]: https://github.com/stephmnt/OCR_Projet05/blob/main/LICENSE
|
| 392 |
-
[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
|
| 393 |
-
[linkedin-url]: https://linkedin.com/in/stephanemanet
|
| 394 |
-
<!-- TODO: -->
|
| 395 |
-
[postgres-shield]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
|
| 396 |
-
[python-shield]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
|
| 397 |
-
[mkdocs-shield]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
|
| 398 |
-
[mkdocs-url]: https://stephmnt.github.io/OCR_Projet05/
|
| 399 |
-
[NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
|
| 400 |
-
[](#)
|
| 401 |
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-

|
|
|
|
| 11 |
python_version: 3.11
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# OCR Projet 05 â PrĂ©diction dâattrition
|
| 15 |
|
| 16 |
+
[](https://github.com/stephmnt/OCR_Projet05/actions/workflows/deploy.yml)
|
| 17 |
+
[](https://github.com/stephmnt/OCR_Projet05/releases)
|
| 18 |
+
[](https://github.com/stephmnt/OCR_Projet05/blob/main/LICENSE)
|
| 19 |
+
[](https://stephmnt.github.io/OCR_Projet05/)
|
| 20 |
+
[](https://cookiecutter-data-science.drivendata.org/)
|
| 21 |
|
| 22 |
+
Ce dĂ©pĂŽt contient le projet OCR_Projet05. Il sâagit dâune application Gradio dĂ©ployable sur Hugging Face Spaces, alimentĂ©e par un pipeline de prĂ©paration de donnĂ©es, un entraĂźnement automatique et des services dâinfĂ©rence orientĂ©s RH (prĂ©diction de dĂ©part dâemployĂ©s).
|
| 23 |
|
| 24 |
+
Ce document décrit :
|
| 25 |
|
| 26 |
+
- la **présentation fonctionnelle** ;
|
| 27 |
+
- les **instructions dâinstallation, dâutilisation et de dĂ©ploiement** (local + Hugging Face)âŻ;
|
| 28 |
+
- le **processus de stockage/gestion des données** (PostgreSQL + journaux) ;
|
| 29 |
+
- les **besoins analytiques** (tableaux de bord, métriques clés).
|
| 30 |
|
| 31 |
+
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
## 1. Vue dâensemble du projet
|
| 34 |
|
| 35 |
+
- **Objectif métier** : détecter les employés à risque de départ en exploitant 3 sources brutes (SIRH, évaluation, sondage interne).
|
| 36 |
+
- **Technologie** : pipeline Python (Typer, pandas, scikit-learn, SQLAlchemy) + application Gradio (`app.py`) déployée sur Hugging Face.
|
| 37 |
+
- **ModĂšle** : pipeline scikit-learn (prĂ©traitement + classifieur) sĂ©rialisĂ© dans `models/best_model.joblib`, paramĂ©trĂ© avec un seuil de dĂ©cision optimisĂ© (visible dans lâUI).
|
| 38 |
+
- **Journaux** : deux sous-dossiers `logs/pipeline_logs` et `logs/tests_logs` contiennent respectivement les traces du pipeline `main.py` et les sorties Pytest.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
Arborescence clé :
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
+
```
|
| 43 |
+
âââ projet_05/ # Package Python principal
|
| 44 |
+
âââ app.py # Interface Gradio (dĂ©ploiement HF)
|
| 45 |
+
âââ scripts/init_db.py # CrĂ©ation/initialisation PostgreSQL
|
| 46 |
+
âââ main.py # Orchestrateur du pipeline local
|
| 47 |
+
âââ docs/ # Documentation MkDocs + tests.md
|
| 48 |
+
âââ tests/ # Suite Pytest (DB + intĂ©gration)
|
| 49 |
+
âââ requirements.txt # DĂ©pendances runtime (HF)
|
| 50 |
+
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
+
---
|
| 53 |
|
| 54 |
+
## 2. Installation locale
|
| 55 |
|
| 56 |
+
### 2.1. Prérequis
|
|
|
|
| 57 |
|
| 58 |
+
1. Python 3.11 (virtualenv ou Poetry recommandé).
|
| 59 |
+
2. PostgreSQL (>= 17) accessible localement (cf. instructions DB plus bas).
|
| 60 |
+
3. Outils optionnels : `make`, `pip`, `pytest`.
|
| 61 |
|
| 62 |
+
### 2.2. Ătapes
|
| 63 |
|
| 64 |
+
```bash
|
| 65 |
+
git clone https://github.com/stephmnt/OCR_Projet05.git
|
| 66 |
+
cd OCR_Projet05
|
| 67 |
+
python -m venv .venv && source .venv/bin/activate
|
| 68 |
+
pip install -r requirements.txt # pour HF
|
| 69 |
+
pip install -e . # pour le développement local (pyproject)
|
| 70 |
+
```
|
| 71 |
|
| 72 |
+
### 2.3. Configuration PostgreSQL
|
| 73 |
|
| 74 |
+
```bash
|
| 75 |
+
/opt/homebrew/opt/postgresql@17/bin/createuser -s postgres
|
| 76 |
+
/opt/homebrew/opt/postgresql@17/bin/psql -d postgres -c "ALTER USER postgres WITH PASSWORD 'postgres';"
|
| 77 |
+
/opt/homebrew/opt/postgresql@17/bin/createdb -O postgres projet05
|
| 78 |
+
```
|
| 79 |
|
| 80 |
+
Puis dans `projet_05/settings.yml` :
|
| 81 |
|
| 82 |
+
```yaml
|
| 83 |
+
database:
|
| 84 |
+
url: postgresql+psycopg://postgres:postgres@localhost:5432/projet05
|
| 85 |
+
schema: public
|
| 86 |
+
```
|
| 87 |
|
| 88 |
+
> Sur une autre infrastructure, adaptez lâURL ou utilisez `PROJET05_DATABASE_URL`.
|
| 89 |
|
| 90 |
+
---
|
|
|
|
| 91 |
|
| 92 |
+
## 3. Utilisation du pipeline
|
|
|
|
| 93 |
|
| 94 |
+
### 3.1. Initialiser la base
|
| 95 |
|
| 96 |
+
```bash
|
| 97 |
+
python -m scripts.init_db
|
| 98 |
+
```
|
|
|
|
|
|
|
| 99 |
|
| 100 |
+
Création des tables `sirh`, `evaluation`, `sond`, `prediction_logs` + insertion des CSV bruts situés dans `data/raw`.
|
| 101 |
|
| 102 |
+
### 3.2. Pipeline complet
|
|
|
|
| 103 |
|
| 104 |
+
```bash
|
| 105 |
+
python main.py
|
| 106 |
+
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
+
- `main.py` enchaĂźne :
|
| 109 |
+
1. Initialisation base PostgreSQL (`scripts.init_db`)
|
| 110 |
+
2. Préparation des données (`projet_05.dataset`)
|
| 111 |
+
3. Feature engineering (`projet_05.features`)
|
| 112 |
+
4. EntraĂźnement (`projet_05.modeling.train`)
|
| 113 |
+
- Les logs sont regroupés dans `logs/pipeline_logs/<timestamp>.log`.
|
| 114 |
|
| 115 |
+
### 3.3. Application Gradio locale
|
| 116 |
|
| 117 |
+
```bash
|
| 118 |
+
python app.py
|
| 119 |
+
```
|
| 120 |
|
| 121 |
+
Lâinterface propose :
|
|
|
|
| 122 |
|
| 123 |
+
- onglet **Formulaire** ;
|
| 124 |
+
- **Tableau interactif** ;
|
| 125 |
+
- **Upload CSV** ;
|
| 126 |
+
- **Fichiers non-mergés** (chargement des trois CSV bruts, fusion automatique).
|
| 127 |
+
Un appel à `_log_predictions` trace chaque prédiction dans PostgreSQL (`prediction_logs`).
|
| 128 |
|
| 129 |
+
---
|
| 130 |
|
| 131 |
+
## 4. Déploiement sur Hugging Face
|
| 132 |
|
| 133 |
+
### 4.1. Dépendances
|
| 134 |
|
| 135 |
+
`requirements.txt` contient toutes les bibliothĂšques nĂ©cessaires Ă la Space (Gradio, scikit-learn, pandas, SQLAlchemy, psycopgâŠ).
|
| 136 |
|
| 137 |
+
### 4.2. Ătapes
|
|
|
|
| 138 |
|
| 139 |
+
1. Créer une Space Gradio (Python 3.11).
|
| 140 |
+
2. Copier `app.py`, `requirements.txt`, `models/`, `data/processed/schema.json`.
|
| 141 |
+
3. Configurer les secrets HF (si besoin de variables dâenvironnement).
|
| 142 |
+
4. Optionnel : définir `HUGGINGFACEHUB_API_TOKEN` pour automatiser les déploiements via GitHub Actions.
|
| 143 |
|
| 144 |
+
### 4.3. Spécificités Space
|
| 145 |
|
| 146 |
+
- Hugging Face nâexpose pas PostgreSQL. Lâapplication Gradio bascule alors sur le mode **pandas fallback** (fusion locale) grĂące Ă la gestion dâerreur de `dataset.py`.
|
| 147 |
+
- Les journaux restants sont ceux gĂ©nĂ©rĂ©s par lâappli (pas dâĂ©criture dans `logs/` cĂŽtĂ© Space).
|
| 148 |
|
| 149 |
+
---
|
| 150 |
|
| 151 |
+
## 5. Processus de stockage & gestion des données
|
| 152 |
|
| 153 |
+
### 5.1. Sources
|
|
|
|
| 154 |
|
| 155 |
+
- `data/raw/extrait_sirh.csv`
|
| 156 |
+
- `data/raw/extrait_eval.csv`
|
| 157 |
+
- `data/raw/extrait_sondage.csv`
|
| 158 |
|
| 159 |
+
### 5.2. Base relationnelle
|
|
|
|
| 160 |
|
| 161 |
+
Tables PostgreSQL créées par `scripts.init_db` :
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
+
| Table | RÎle | Colonnes clés |
|
| 164 |
+
| --- | --- | --- |
|
| 165 |
+
| `sirh` | Profil RH structuré | `id_employee`, `age`, `revenu_mensuel`, `poste`, etc. |
|
| 166 |
+
| `evaluation` | Historique dâĂ©valuations | `id_employee`, `note_evaluation_actuelle`, `niveau_hierarchique_poste`, `satisfaction_*` |
|
| 167 |
+
| `sond` | Sondage + cible | `id_employee`, `a_quitte_l_entreprise`, `distance_domicile_travail`, `domaine_etude`, etc. |
|
| 168 |
+
| `prediction_logs` | Journal dâinfĂ©rence | `log_id`, `created_at`, `id_employee`, `source`, `probability`, `decision`, `payload` JSON |
|
| 169 |
|
| 170 |
+
`projet_05.dataset` fusionne `sirh â© evaluation â© sond` via SQL ; en cas dâindisponibilitĂ© DB, la fusion pandas est utilisĂ©e en repli.
|
| 171 |
|
| 172 |
+
### 5.3. Journaux et tracing
|
|
|
|
|
|
|
| 173 |
|
| 174 |
+
- `logs/pipeline_logs` : sorties `main.py`
|
| 175 |
+
- `logs/tests_logs` : sorties Pytest (`make test`)
|
| 176 |
+
- `prediction_logs` : base PostgreSQL, indispensable pour lâaudit des dĂ©cisions ML.
|
| 177 |
|
| 178 |
+
---
|
| 179 |
|
| 180 |
+
## 6. Tests et couverture
|
|
|
|
| 181 |
|
| 182 |
+
### 6.1. Exécution
|
| 183 |
|
| 184 |
+
```bash
|
| 185 |
+
pytest
|
| 186 |
+
```
|
| 187 |
|
| 188 |
+
- La fixture `initialized_db` crée une base `projet05_test`, lance `scripts.init_db`, puis la supprime.
|
| 189 |
+
- Les logs Pytest sont stockés dans `logs/tests_logs/<timestamp>.log`.
|
| 190 |
|
| 191 |
+
### 6.2. Couverture
|
| 192 |
|
| 193 |
+
- Rapports `term-missing` + `coverage.xml`.
|
| 194 |
+
- Zones non couvertes : `features.py`, `modeling/train.py`, `explainability.py` (Ă prioriser si besoin).
|
| 195 |
|
| 196 |
+
---
|
| 197 |
|
| 198 |
+
## 7. Besoins analytiques / tableaux de bord
|
| 199 |
|
| 200 |
+
- **Dashboard RH** basé sur les journaux `prediction_logs` :
|
| 201 |
+
- Volume de prédictions par source (Formulaire / CSV / Raw).
|
| 202 |
+
- Répartition des scores (`proba_depart`) / seuil de décision.
|
| 203 |
+
- Historique des décisions (tendance du taux de risque).
|
| 204 |
+
- Drill-down par attributs (`departement`, `poste`, `genre`âŠ).
|
| 205 |
+
- **Monitoring modĂšle** :
|
| 206 |
+
- Taux dâutilisation (logs journaliers).
|
| 207 |
+
- Drift potentiel : comparer les distributions des features avec `docs/` (notebooks dâanalyse) ou via un outil externe.
|
| 208 |
+
- **KPI Data/IT** :
|
| 209 |
+
- Latence dâinfĂ©rence (calculable via timestamps, si ajoutĂ©s).
|
| 210 |
+
- Suivi des erreurs (logs pipeline/tests).
|
| 211 |
|
| 212 |
+
---
|
| 213 |
|
| 214 |
+
## 8. Choix techniques et justification
|
| 215 |
|
| 216 |
+
Ce projet combine une interface Gradio, une base PostgreSQL et un pipeline CI/CD GitHub Actions. Les dĂ©cisions dâarchitecture dĂ©taillant le pourquoi/du comment (Gradio vs FastAPI, choix de PostgreSQL, automatisations) sont regroupĂ©es dans [`docs/docs/choix-techniques.md`](docs/docs/choix-techniques.md). Cette section sert de support de soutenance pour rappeler :
|
|
|
|
| 217 |
|
| 218 |
+
- pourquoi Gradio a été privilégié pour la démonstration Hugging Face ;
|
| 219 |
+
- comment PostgreSQL sécurise la fusion des trois sources et la journalisation ;
|
| 220 |
+
- en quoi les workflows GitHub Actions garantissent un déploiement fiable.
|
| 221 |
+
- comment les environnements sont configurés : `main.py` est exécuté en environnement **test** (base `projet05_test`, variables `PROJET05_TEST_DATABASE_URL`) pour valider le pipeline complet, tandis que `app.py` tourne en **production** (Space Hugging Face, variable `PROJET05_DATABASE_URL`/fallback pandas) afin de servir les utilisateurs finaux.
|
| 222 |
|
| 223 |
+
## 9. Instructions rapides
|
| 224 |
|
| 225 |
+
| Action | Commande |
|
| 226 |
+
| --- | --- |
|
| 227 |
+
| Init DB + pipeline complet | `python main.py` |
|
| 228 |
+
| Lancer Gradio local | `python app.py` |
|
| 229 |
+
| Initialiser la base seule | `python -m scripts.init_db` |
|
| 230 |
+
| Lancer les tests + logs | `make test` |
|
| 231 |
+
| Déployer sur Hugging Face | Pousser `app.py`, `requirements.txt`, `models/`, config Space |
|
| 232 |
|
| 233 |
+
---
|
| 234 |
|
| 235 |
+
## 10. Licence / références
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
|
| 237 |
+
Ce projet est fourni dans le cadre de la formation OpenClassrooms.
|
| 238 |
+
La documentation complémentaire est disponible dans `docs/` (MkDocs + `docs/docs/tests.md` pour les tests).
|
| 239 |
+
Pour toute question : [LinkedIn](https://linkedin.com/in/stephanemanet).
|
|
|
hf_space/hf_space/hf_space/hf_space/hf_space/Makefile
CHANGED
|
@@ -43,7 +43,9 @@ format:
|
|
| 43 |
## Run tests
|
| 44 |
.PHONY: test
|
| 45 |
test:
|
| 46 |
-
|
|
|
|
|
|
|
| 47 |
|
| 48 |
|
| 49 |
## Set up Python interpreter environment
|
|
|
|
| 43 |
## Run tests
|
| 44 |
.PHONY: test
|
| 45 |
test:
|
| 46 |
+
@mkdir -p logs/tests_logs
|
| 47 |
+
@timestamp=$$(date +%Y%m%d_%H%M%S); \
|
| 48 |
+
pytest | tee logs/tests_logs/$$timestamp.log
|
| 49 |
|
| 50 |
|
| 51 |
## Set up Python interpreter environment
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore
CHANGED
|
@@ -7,6 +7,7 @@ questions.md
|
|
| 7 |
/reports/
|
| 8 |
/data/
|
| 9 |
runtime.txt
|
|
|
|
| 10 |
|
| 11 |
# vim
|
| 12 |
*.swp
|
|
|
|
| 7 |
/reports/
|
| 8 |
/data/
|
| 9 |
runtime.txt
|
| 10 |
+
/logs/
|
| 11 |
|
| 12 |
# vim
|
| 13 |
*.swp
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md
CHANGED
|
@@ -76,6 +76,66 @@ python_version: 3.11
|
|
| 76 |
- `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thÚme.
|
| 77 |
- `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
--------
|
| 80 |
|
| 81 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
@@ -105,7 +165,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
|
|
| 105 |
[![Issues][issues-shield]][issues-url]
|
| 106 |
[![project_license][license-shield]][license-url]
|
| 107 |
[![LinkedIn][linkedin-shield]][linkedin-url]
|
| 108 |
-
](#)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
- `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thÚme.
|
| 77 |
- `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
|
| 78 |
|
| 79 |
+
## Base de données PostgreSQL
|
| 80 |
+
|
| 81 |
+
Depuis la branche `postgresql`, toute la fusion des fichiers bruts repose sur une base PostgreSQL accessible via SQLAlchemy.
|
| 82 |
+
|
| 83 |
+
1. Installez PostgreSQL (Homebrew, package officiel, etc.).
|
| 84 |
+
2. Créez un rÎle et la base attendue :
|
| 85 |
+
|
| 86 |
+
> Exemple pour MacOS
|
| 87 |
+
|
| 88 |
+
```bash
|
| 89 |
+
/opt/homebrew/opt/postgresql@17/bin/createuser -s postgres
|
| 90 |
+
/opt/homebrew/opt/postgresql@17/bin/psql -d postgres -c "ALTER USER postgres WITH PASSWORD 'postgres';"
|
| 91 |
+
/opt/homebrew/opt/postgresql@17/bin/createdb -O postgres projet05
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
Adaptez les chemins/versions selon votre environnement.
|
| 95 |
+
3. Renseignez la chaĂźne de connexion dans `projet_05/settings.yml` :
|
| 96 |
+
|
| 97 |
+
```yaml
|
| 98 |
+
database:
|
| 99 |
+
url: postgresql+psycopg://user:password@host:5432/projet05
|
| 100 |
+
schema: public
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
Il est également possible de définir `PROJET05_DATABASE_URL` dans l'environnement.
|
| 104 |
+
|
| 105 |
+
4. Initialisez la base (création des tables + insertion des CSV d'exemple) avec :
|
| 106 |
+
|
| 107 |
+
```bash
|
| 108 |
+
python -m scripts.init_db
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
5. Assurez-vous que l'utilisateur possÚde les droits `CREATE/DROP TABLE` dans le schéma ciblé : les tables `sirh`, `evaluation`, `sond` ainsi que `prediction_logs` seront créées ou recréées à chaque ré-exécution.
|
| 112 |
+
|
| 113 |
+
6. Lancez ensuite `python -m projet_05.dataset` comme auparavant (ou `python main.py` pour exĂ©cuter toutes les Ă©tapes). La requĂȘte SQL utilisĂ©e est toujours exportĂ©e dans `reports/merge_sql.sql` pour audit.
|
| 114 |
+
|
| 115 |
+
> Les interactions utilisateur/modÚle (qu'elles proviennent du formulaire, du tableau ou d'un upload) sont automatiquement journalisées dans la table `prediction_logs`, ce qui permet de tracer les usages et de constituer un dataset réel pour le monitoring.
|
| 116 |
+
|
| 117 |
+
## Tests & couverture
|
| 118 |
+
|
| 119 |
+
Une batterie de tests Pytest valident lâintĂ©gritĂ© de la base PostgreSQL, la fusion des donnĂ©es et la journalisation des prĂ©dictions.
|
| 120 |
+
|
| 121 |
+
1. Démarrez PostgreSQL (cf. section précédente) et créez un utilisateur ayant les droits `CREATE/DROP DATABASE`.
|
| 122 |
+
2. Facultatif : définissez `PROJET05_TEST_DATABASE_URL` si vous souhaitez utiliser une URL différente de `postgresql+psycopg://postgres:postgres@localhost:5432/projet05_test`.
|
| 123 |
+
3. Exécutez les tests et générez le rapport de couverture :
|
| 124 |
+
|
| 125 |
+
```bash
|
| 126 |
+
pytest
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
La configuration Pytest produit Ă la fois un rapport terminal (`--cov-report=term-missing`) et un fichier `coverage.xml` exploitable par vos outils CI/CD.
|
| 130 |
+
Les sorties complÚtes sont sauvegardées dans `logs/tests_logs/<timestamp>.log`.
|
| 131 |
+
|
| 132 |
+
Les tests vérifient notamment :
|
| 133 |
+
|
| 134 |
+
- la création des tables `sirh`, `evaluation`, `sond`, `prediction_logs` et la cohérence du nombre de lignes insérées ;
|
| 135 |
+
- lâintĂ©gritĂ© du DataFrame fusionnĂ© (typage, absence de valeurs nulles sur la clĂ© primaire, cohĂ©rence de la cible) ;
|
| 136 |
+
- la robustesse du script de log des prĂ©dictions (insertion dâentrĂ©es dans `prediction_logs` et nettoyage) ;
|
| 137 |
+
- la génération des logs de pipeline, regroupés dans `logs/pipeline_logs/<timestamp>.log`.
|
| 138 |
+
|
| 139 |
--------
|
| 140 |
|
| 141 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 165 |
[![Issues][issues-shield]][issues-url]
|
| 166 |
[![project_license][license-shield]][license-url]
|
| 167 |
[![LinkedIn][linkedin-shield]][linkedin-url]
|
| 168 |
+

|
| 169 |
|
| 170 |
<!-- PROJECT LOGO -->
|
| 171 |
<br />
|
|
|
|
| 398 |
[mkdocs-url]: https://stephmnt.github.io/OCR_Projet05/
|
| 399 |
[NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
|
| 400 |
[](#)
|
| 401 |
+
|
| 402 |
+

|
| 403 |
+
[![https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff]][[mkdocs-url](https://stephmnt.github.io/OCR_Projet05/)]
|
| 404 |
+

|
| 405 |
+

|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py
CHANGED
|
@@ -5,15 +5,265 @@ from pathlib import Path
|
|
| 5 |
from typing import Any
|
| 6 |
|
| 7 |
import gradio as gr
|
|
|
|
| 8 |
import pandas as pd
|
| 9 |
from loguru import logger
|
|
|
|
|
|
|
| 10 |
|
| 11 |
from projet_05.branding import apply_brand_theme
|
| 12 |
from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
|
|
|
|
|
|
|
| 13 |
|
| 14 |
MODEL_PATH = Path("models/best_model.joblib")
|
| 15 |
METADATA_PATH = Path("models/best_model_meta.json")
|
| 16 |
SCHEMA_PATH = Path("data/processed/schema.json")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
def _load_schema(path: Path) -> dict[str, Any]:
|
|
@@ -54,6 +304,28 @@ def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
|
|
| 54 |
return []
|
| 55 |
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
|
| 58 |
"""Normalize any user input into a validated DataFrame.
|
| 59 |
|
|
@@ -79,6 +351,195 @@ def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
|
|
| 79 |
return df
|
| 80 |
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
def _ensure_model():
|
| 83 |
"""Ensure that a pipeline has been loaded before inference."""
|
| 84 |
if PIPELINE is None:
|
|
@@ -90,46 +551,77 @@ def _ensure_model():
|
|
| 90 |
def score_table(table):
|
| 91 |
"""Score data entered via the interactive table."""
|
| 92 |
_ensure_model()
|
| 93 |
-
df = _convert_input(table,
|
|
|
|
|
|
|
| 94 |
drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
|
| 95 |
-
|
| 96 |
df,
|
| 97 |
PIPELINE,
|
| 98 |
THRESHOLD,
|
| 99 |
drop_columns=drop_cols,
|
| 100 |
required_features=FEATURE_ORDER or None,
|
| 101 |
)
|
|
|
|
|
|
|
| 102 |
|
| 103 |
|
| 104 |
def score_csv(upload):
|
|
|
|
| 105 |
"""Score a CSV uploaded by the user."""
|
| 106 |
_ensure_model()
|
| 107 |
if upload is None:
|
| 108 |
raise gr.Error("Veuillez déposer un fichier CSV.")
|
| 109 |
df = pd.read_csv(upload.name)
|
|
|
|
|
|
|
| 110 |
drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
|
| 111 |
-
|
| 112 |
df,
|
| 113 |
PIPELINE,
|
| 114 |
THRESHOLD,
|
| 115 |
drop_columns=drop_cols,
|
| 116 |
required_features=FEATURE_ORDER or None,
|
| 117 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
|
| 120 |
def predict_from_form(*values):
|
| 121 |
"""Score a single row coming from the form tab."""
|
| 122 |
_ensure_model()
|
| 123 |
-
if not
|
| 124 |
raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
|
| 125 |
-
payload = {feature: value for feature, value in zip(
|
| 126 |
df = pd.DataFrame([payload])
|
|
|
|
|
|
|
| 127 |
scored = run_inference(
|
| 128 |
df,
|
| 129 |
PIPELINE,
|
| 130 |
THRESHOLD,
|
| 131 |
required_features=FEATURE_ORDER or None,
|
| 132 |
)
|
|
|
|
| 133 |
row = scored.iloc[0]
|
| 134 |
label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
|
| 135 |
return {
|
|
@@ -157,11 +649,37 @@ except FileNotFoundError as exc:
|
|
| 157 |
logger.warning("Artéfact manquant: {}", exc)
|
| 158 |
|
| 159 |
FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
with gr.Blocks(title="Prédicteur d'attrition") as demo:
|
| 162 |
-
gr.Markdown("#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
gr.Markdown(
|
| 164 |
-
"Le modĂšle
|
| 165 |
)
|
| 166 |
|
| 167 |
if PIPELINE is None:
|
|
@@ -172,26 +690,22 @@ with gr.Blocks(title="Prédicteur d'attrition") as demo:
|
|
| 172 |
gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
|
| 173 |
|
| 174 |
with gr.Tab("Formulaire unitaire"):
|
| 175 |
-
if not
|
| 176 |
gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
|
| 177 |
else:
|
| 178 |
form_inputs: list[gr.components.Component] = [] # type: ignore
|
| 179 |
-
for feature in
|
| 180 |
-
form_inputs.append(
|
| 181 |
-
gr.Textbox(label=feature, placeholder=f"Saisir {feature.replace('_', ' ')}")
|
| 182 |
-
)
|
| 183 |
form_output = gr.JSON(label="Résultat")
|
| 184 |
gr.Button("Prédire").click(
|
| 185 |
fn=predict_from_form,
|
| 186 |
inputs=form_inputs,
|
| 187 |
outputs=form_output,
|
| 188 |
)
|
| 189 |
-
|
| 190 |
-
with gr.Tab("Tableau interactif"):
|
| 191 |
table_input = gr.Dataframe(
|
| 192 |
-
headers=
|
| 193 |
row_count=(1, "dynamic"),
|
| 194 |
-
col_count=(len(
|
| 195 |
type="pandas",
|
| 196 |
)
|
| 197 |
table_output = gr.Dataframe(label="Prédictions", type="pandas")
|
|
@@ -201,7 +715,8 @@ with gr.Blocks(title="Prédicteur d'attrition") as demo:
|
|
| 201 |
outputs=table_output,
|
| 202 |
)
|
| 203 |
|
| 204 |
-
with gr.Tab("Fichier CSV"):
|
|
|
|
| 205 |
file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
|
| 206 |
file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
|
| 207 |
gr.Button("Scorer le fichier").click(
|
|
@@ -210,6 +725,21 @@ with gr.Blocks(title="Prédicteur d'attrition") as demo:
|
|
| 210 |
outputs=file_output,
|
| 211 |
)
|
| 212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
if __name__ == "__main__":
|
| 215 |
demo.launch()
|
|
|
|
| 5 |
from typing import Any
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
+
import numpy as np
|
| 9 |
import pandas as pd
|
| 10 |
from loguru import logger
|
| 11 |
+
from sqlalchemy import create_engine
|
| 12 |
+
from sqlalchemy.engine import Engine
|
| 13 |
|
| 14 |
from projet_05.branding import apply_brand_theme
|
| 15 |
from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
|
| 16 |
+
from projet_05.settings import load_settings
|
| 17 |
+
from projet_05 import dataset as ds
|
| 18 |
|
| 19 |
MODEL_PATH = Path("models/best_model.joblib")
|
| 20 |
METADATA_PATH = Path("models/best_model_meta.json")
|
| 21 |
SCHEMA_PATH = Path("data/processed/schema.json")
|
| 22 |
+
DERIVED_FEATURES = {
|
| 23 |
+
"augmentation_par_revenu",
|
| 24 |
+
"annee_sur_poste_par_experience",
|
| 25 |
+
"nb_formation_par_experience",
|
| 26 |
+
"score_moyen_satisfaction",
|
| 27 |
+
"dern_promo_par_experience",
|
| 28 |
+
"evolution_note",
|
| 29 |
+
}
|
| 30 |
+
SATISFACTION_COLUMNS = [
|
| 31 |
+
"satisfaction_employee_environnement",
|
| 32 |
+
"satisfaction_employee_nature_travail",
|
| 33 |
+
"satisfaction_employee_equipe",
|
| 34 |
+
"satisfaction_employee_equilibre_pro_perso",
|
| 35 |
+
]
|
| 36 |
+
NUMERIC_CODE_COLUMNS = ["niveau_hierarchique_poste", "niveau_education"]
|
| 37 |
+
NUMERIC_FEATURES: set[str] = set()
|
| 38 |
+
CATEGORICAL_FEATURES: set[str] = set()
|
| 39 |
+
|
| 40 |
+
# Configuration manuelle des champs d'entrée (label + placeholder).
|
| 41 |
+
FIELD_UI_CONFIG = [
|
| 42 |
+
{"name": "age", "label": "Ăge", "placeholder": "Ăge en annĂ©es (ex : 35)"},
|
| 43 |
+
{"name": "revenu_mensuel", "label": "Revenu mensuel (âŹ)", "placeholder": "Montant mensuel en euros (ex : 4500)"},
|
| 44 |
+
{"name": "annees_dans_l_entreprise", "label": "Années dans l'entreprise", "placeholder": "Ancienneté totale (ex : 4.5)"},
|
| 45 |
+
{"name": "annees_dans_le_poste_actuel", "label": "Années sur le poste actuel", "placeholder": "Durée dans le poste (ex : 2)"},
|
| 46 |
+
{
|
| 47 |
+
"name": "annees_depuis_la_derniere_promotion",
|
| 48 |
+
"label": "Années depuis la derniÚre promotion",
|
| 49 |
+
"placeholder": "Durée depuis la derniÚre promotion (ex : 1)",
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"name": "distance_domicile_travail",
|
| 53 |
+
"label": "Distance domicile-travail (km)",
|
| 54 |
+
"placeholder": "Distance en kilomĂštres (ex : 12)",
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"name": "nombre_participation_pee",
|
| 58 |
+
"label": "Nombre de participations PEE",
|
| 59 |
+
"placeholder": "Nombre de participations (entier)",
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"name": "note_evaluation_actuelle",
|
| 63 |
+
"label": "Note d'évaluation actuelle",
|
| 64 |
+
"placeholder": "Score actuel (1 Ă 5)",
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "note_evaluation_precedente",
|
| 68 |
+
"label": "Note d'évaluation précédente",
|
| 69 |
+
"placeholder": "Score précédent (1 à 5)",
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"name": "annees_depuis_le_changement_deposte",
|
| 73 |
+
"label": "Années depuis le dernier changement de poste",
|
| 74 |
+
"placeholder": "Temps écoulé (ex : 0 si jamais)",
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "annee_experience_totale",
|
| 78 |
+
"label": "Années d'expérience totale",
|
| 79 |
+
"placeholder": "Expérience cumulative (ex : 8)",
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"name": "nb_formations_suivies",
|
| 83 |
+
"label": "Nombre de formations suivies",
|
| 84 |
+
"placeholder": "Total des formations (entier)",
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"name": "satisfaction_employee_environnement",
|
| 88 |
+
"label": "Satisfaction environnement",
|
| 89 |
+
"placeholder": "Note de 1 (faible) Ă 5 (forte)",
|
| 90 |
+
"info": "Valeur comprise entre 1 et 5",
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"name": "satisfaction_employee_nature_travail",
|
| 94 |
+
"label": "Satisfaction nature du travail",
|
| 95 |
+
"placeholder": "Note de 1 Ă 5",
|
| 96 |
+
"info": "Valeur comprise entre 1 et 5",
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"name": "satisfaction_employee_equipe",
|
| 100 |
+
"label": "Satisfaction équipe",
|
| 101 |
+
"placeholder": "Note de 1 Ă 5",
|
| 102 |
+
"info": "Valeur comprise entre 1 et 5",
|
| 103 |
+
},
|
| 104 |
+
{
|
| 105 |
+
"name": "satisfaction_employee_equilibre_pro_perso",
|
| 106 |
+
"label": "Satisfaction équilibre pro/perso",
|
| 107 |
+
"placeholder": "Note de 1 Ă 5",
|
| 108 |
+
"info": "Valeur comprise entre 1 et 5",
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"name": "genre",
|
| 112 |
+
"label": "Genre",
|
| 113 |
+
"component": "dropdown",
|
| 114 |
+
"choices": ["Femme", "Homme"],
|
| 115 |
+
"info": "Sélectionnez le genre",
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"name": "departement",
|
| 119 |
+
"label": "Département",
|
| 120 |
+
"component": "dropdown",
|
| 121 |
+
"choices": ["Commercial", "Consulting", "Ressources Humaines"],
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"name": "frequence_deplacement",
|
| 125 |
+
"label": "Fréquence des déplacements",
|
| 126 |
+
"component": "dropdown",
|
| 127 |
+
"choices": ["Aucun", "Occasionnel", "Frequent"],
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"name": "statut_marital",
|
| 131 |
+
"label": "Statut marital",
|
| 132 |
+
"component": "dropdown",
|
| 133 |
+
"choices": ["Célibataire", "Marié(e)", "Divorcé(e)"],
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"name": "poste",
|
| 137 |
+
"label": "Poste occupé",
|
| 138 |
+
"component": "dropdown",
|
| 139 |
+
"choices": [
|
| 140 |
+
"Cadre Commercial",
|
| 141 |
+
"Assistant de Direction",
|
| 142 |
+
"Consultant",
|
| 143 |
+
"Tech Lead",
|
| 144 |
+
"Manager",
|
| 145 |
+
"Senior Manager",
|
| 146 |
+
"Représentant Commercial",
|
| 147 |
+
"Directeur Technique",
|
| 148 |
+
"Ressources Humaines",
|
| 149 |
+
],
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"name": "niveau_hierarchique_poste",
|
| 153 |
+
"label": "Niveau hiérarchique",
|
| 154 |
+
"component": "dropdown",
|
| 155 |
+
"choices": [
|
| 156 |
+
"1, junior",
|
| 157 |
+
"2",
|
| 158 |
+
"3",
|
| 159 |
+
"4",
|
| 160 |
+
"5, senior",
|
| 161 |
+
],
|
| 162 |
+
"info": "Valeur numérique issue du SIRH (1 à 5)",
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"name": "niveau_education",
|
| 166 |
+
"label": "Niveau d'études",
|
| 167 |
+
"component": "dropdown",
|
| 168 |
+
"choices": [
|
| 169 |
+
"1, licence",
|
| 170 |
+
"2",
|
| 171 |
+
"3",
|
| 172 |
+
"4",
|
| 173 |
+
"5, master",
|
| 174 |
+
],
|
| 175 |
+
"info": "Indice numérique (1 à 5) figurant dans les exports bruts",
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "domaine_etude",
|
| 179 |
+
"label": "Domaine d'étude",
|
| 180 |
+
"component": "dropdown",
|
| 181 |
+
"choices": ["Entrepreunariat", "Infra & Cloud", "Marketing", "Ressources Humaines", "Transformation Digitale"],
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"name": "heure_supplementaires",
|
| 185 |
+
"label": "Heures supplémentaires",
|
| 186 |
+
"component": "dropdown",
|
| 187 |
+
"choices": ["Oui", "Non"],
|
| 188 |
+
},
|
| 189 |
+
]
|
| 190 |
+
FIELD_UI_LOOKUP = {cfg["name"]: cfg for cfg in FIELD_UI_CONFIG}
|
| 191 |
+
try:
|
| 192 |
+
SETTINGS = load_settings()
|
| 193 |
+
except Exception: # pragma: no cover - remains optional when config absent
|
| 194 |
+
SETTINGS = None
|
| 195 |
+
CACHED_ENGINE: Engine | None = None
|
| 196 |
+
CATEGORICAL_NORMALIZERS: dict[str, dict[str, str]] = {
|
| 197 |
+
"genre": {
|
| 198 |
+
"f": "F",
|
| 199 |
+
"femme": "F",
|
| 200 |
+
"m": "M",
|
| 201 |
+
"homme": "M",
|
| 202 |
+
},
|
| 203 |
+
"statut_marital": {
|
| 204 |
+
"célibataire": "Célibataire",
|
| 205 |
+
"celibataire": "Célibataire",
|
| 206 |
+
"marié(e)": "Marié(e)",
|
| 207 |
+
"marie(e)": "Marié(e)",
|
| 208 |
+
"marie": "Marié(e)",
|
| 209 |
+
"marié": "Marié(e)",
|
| 210 |
+
"divorcé(e)": "Divorcé(e)",
|
| 211 |
+
"divorce(e)": "Divorcé(e)",
|
| 212 |
+
},
|
| 213 |
+
"departement": {
|
| 214 |
+
"commercial": "Commercial",
|
| 215 |
+
"consulting": "Consulting",
|
| 216 |
+
"ressources humaines": "Ressources Humaines",
|
| 217 |
+
},
|
| 218 |
+
"poste": {
|
| 219 |
+
"cadre commercial": "Cadre Commercial",
|
| 220 |
+
"assistant de direction": "Assistant de Direction",
|
| 221 |
+
"consultant": "Consultant",
|
| 222 |
+
"tech lead": "Tech Lead",
|
| 223 |
+
"manager": "Manager",
|
| 224 |
+
"senior manager": "Senior Manager",
|
| 225 |
+
"représentant commercial": "Représentant Commercial",
|
| 226 |
+
"representant commercial": "Représentant Commercial",
|
| 227 |
+
"directeur technique": "Directeur Technique",
|
| 228 |
+
"ressources humaines": "Ressources Humaines",
|
| 229 |
+
},
|
| 230 |
+
"frequence_deplacement": {
|
| 231 |
+
"aucun": "Aucun",
|
| 232 |
+
"aucune": "Aucun",
|
| 233 |
+
"occasionnel": "Occasionnel",
|
| 234 |
+
"occasionnelle": "Occasionnel",
|
| 235 |
+
"frequent": "Frequent",
|
| 236 |
+
"fréquent": "Frequent",
|
| 237 |
+
},
|
| 238 |
+
"domaine_etude": {
|
| 239 |
+
"entrepreunariat": "Entrepreunariat",
|
| 240 |
+
"infra & cloud": "Infra & Cloud",
|
| 241 |
+
"infra et cloud": "Infra & Cloud",
|
| 242 |
+
"marketing": "Marketing",
|
| 243 |
+
"ressources humaines": "Ressources Humaines",
|
| 244 |
+
"transformation digitale": "Transformation Digitale",
|
| 245 |
+
},
|
| 246 |
+
"heure_supplementaires": {
|
| 247 |
+
"oui": "Oui",
|
| 248 |
+
"o": "Oui",
|
| 249 |
+
"y": "Oui",
|
| 250 |
+
"non": "Non",
|
| 251 |
+
"n": "Non",
|
| 252 |
+
},
|
| 253 |
+
"niveau_hierarchique_poste": {
|
| 254 |
+
"junior": "Junior",
|
| 255 |
+
"confirmé": "Confirmé",
|
| 256 |
+
"confirme": "Confirmé",
|
| 257 |
+
"direction": "Direction",
|
| 258 |
+
"senior": "Senior",
|
| 259 |
+
},
|
| 260 |
+
"niveau_education": {
|
| 261 |
+
"licence": "Licence",
|
| 262 |
+
"master": "Master",
|
| 263 |
+
"doctorat": "Doctorat",
|
| 264 |
+
"bts": "BTS",
|
| 265 |
+
},
|
| 266 |
+
}
|
| 267 |
|
| 268 |
|
| 269 |
def _load_schema(path: Path) -> dict[str, Any]:
|
|
|
|
| 304 |
return []
|
| 305 |
|
| 306 |
|
| 307 |
+
def _ensure_settings():
|
| 308 |
+
"""Ensure configuration settings are available for data fusion."""
|
| 309 |
+
|
| 310 |
+
if SETTINGS is None:
|
| 311 |
+
raise gr.Error(
|
| 312 |
+
"Configuration introuvable. Placez `projet_05/settings.yml` dans le dépÎt ou renseignez PROJET05_SETTINGS."
|
| 313 |
+
)
|
| 314 |
+
return SETTINGS
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
def _get_db_engine(settings: Settings) -> Engine: # pyright: ignore[reportUndefinedVariable]
|
| 318 |
+
global CACHED_ENGINE
|
| 319 |
+
if CACHED_ENGINE is not None:
|
| 320 |
+
return CACHED_ENGINE
|
| 321 |
+
if not settings.db_url:
|
| 322 |
+
raise RuntimeError(
|
| 323 |
+
"Aucune URL de base de données n'a été fournie. Configurez `database.url` dans settings.yml."
|
| 324 |
+
)
|
| 325 |
+
CACHED_ENGINE = create_engine(settings.db_url, future=True)
|
| 326 |
+
return CACHED_ENGINE
|
| 327 |
+
|
| 328 |
+
|
| 329 |
def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
|
| 330 |
"""Normalize any user input into a validated DataFrame.
|
| 331 |
|
|
|
|
| 351 |
return df
|
| 352 |
|
| 353 |
|
| 354 |
+
def _read_uploaded_csv(upload, label: str) -> pd.DataFrame:
|
| 355 |
+
"""Load an uploaded CSV file or raise a user-friendly error."""
|
| 356 |
+
|
| 357 |
+
if upload is None:
|
| 358 |
+
raise gr.Error(f"Veuillez déposer le fichier {label}.")
|
| 359 |
+
try:
|
| 360 |
+
return pd.read_csv(upload.name)
|
| 361 |
+
except Exception as exc: # pragma: no cover - delegated to pandas
|
| 362 |
+
raise gr.Error(f"Impossible de lire le fichier {label}: {exc}") from exc
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
def _resolve_field_ui(feature: str) -> tuple[str, str, str | None, str, dict[str, Any]]:
|
| 366 |
+
"""Return UI metadata (label, placeholder, info, component type, config)."""
|
| 367 |
+
|
| 368 |
+
config = FIELD_UI_LOOKUP.get(feature, {})
|
| 369 |
+
label = config.get("label") or feature.replace("_", " ").capitalize()
|
| 370 |
+
placeholder = config.get("placeholder") or f"Saisir {label.lower()}"
|
| 371 |
+
info = config.get("info")
|
| 372 |
+
component = config.get("component", "textbox")
|
| 373 |
+
return label, placeholder, info, component, config
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
def _build_input_component(feature: str) -> gr.components.Component: # type: ignore
|
| 377 |
+
"""Instantiate the appropriate Gradio component for a feature."""
|
| 378 |
+
|
| 379 |
+
label, placeholder, info, component, config = _resolve_field_ui(feature)
|
| 380 |
+
if component == "dropdown":
|
| 381 |
+
choices = config.get("choices") or []
|
| 382 |
+
default = config.get("default")
|
| 383 |
+
allow_custom = config.get("allow_custom_value", False)
|
| 384 |
+
return gr.Dropdown(
|
| 385 |
+
label=label,
|
| 386 |
+
choices=choices,
|
| 387 |
+
value=default,
|
| 388 |
+
info=info,
|
| 389 |
+
allow_custom_value=allow_custom,
|
| 390 |
+
)
|
| 391 |
+
return gr.Textbox(label=label, placeholder=placeholder, info=info)
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
def _normalize_categorical_values(df: pd.DataFrame) -> pd.DataFrame:
|
| 395 |
+
"""Normalize friendly categorical values into the codes used by the model."""
|
| 396 |
+
|
| 397 |
+
normalized = df.copy()
|
| 398 |
+
|
| 399 |
+
def _normalize_value(value, mapping: dict[str, str]):
|
| 400 |
+
if pd.isna(value):
|
| 401 |
+
return value
|
| 402 |
+
if isinstance(value, str):
|
| 403 |
+
cleaned = value.strip()
|
| 404 |
+
lowered = cleaned.lower()
|
| 405 |
+
return mapping.get(lowered, cleaned)
|
| 406 |
+
return mapping.get(value, value)
|
| 407 |
+
|
| 408 |
+
for column, mapping in CATEGORICAL_NORMALIZERS.items():
|
| 409 |
+
if column not in normalized.columns:
|
| 410 |
+
continue
|
| 411 |
+
normalized[column] = normalized[column].apply(lambda v, m=mapping: _normalize_value(v, m))
|
| 412 |
+
for column in NUMERIC_CODE_COLUMNS:
|
| 413 |
+
if column in normalized.columns:
|
| 414 |
+
extracted = (
|
| 415 |
+
normalized[column]
|
| 416 |
+
.astype(str)
|
| 417 |
+
.str.extract(r"(-?\d+(?:[.,]\d+)?)")[0]
|
| 418 |
+
.str.replace(",", ".", regex=False)
|
| 419 |
+
)
|
| 420 |
+
normalized[column] = pd.to_numeric(extracted, errors="coerce")
|
| 421 |
+
numeric_targets = [col for col in NUMERIC_FEATURES.union(DERIVED_FEATURES).union(NUMERIC_CODE_COLUMNS) if col in normalized.columns]
|
| 422 |
+
for column in numeric_targets:
|
| 423 |
+
normalized[column] = pd.to_numeric(normalized[column], errors="coerce")
|
| 424 |
+
return normalized
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
def _apply_derived_features(df: pd.DataFrame) -> pd.DataFrame:
|
| 428 |
+
"""Recompute engineered ratios so end-users do not have to provide them."""
|
| 429 |
+
|
| 430 |
+
enriched = _normalize_categorical_values(df)
|
| 431 |
+
|
| 432 |
+
def _safe_ratio(numerator: str, denominator: str, output: str) -> None:
|
| 433 |
+
if numerator not in enriched.columns or denominator not in enriched.columns:
|
| 434 |
+
return
|
| 435 |
+
numerator_series = pd.to_numeric(enriched[numerator], errors="coerce")
|
| 436 |
+
denominator_series = pd.to_numeric(enriched[denominator], errors="coerce").replace(0, pd.NA)
|
| 437 |
+
enriched[output] = numerator_series / denominator_series
|
| 438 |
+
|
| 439 |
+
prev_raise_col = "augementation_salaire_precedente"
|
| 440 |
+
if prev_raise_col in enriched:
|
| 441 |
+
normalized = (
|
| 442 |
+
enriched[prev_raise_col]
|
| 443 |
+
.astype(str)
|
| 444 |
+
.str.replace("%", "", regex=False)
|
| 445 |
+
.str.replace(",", ".", regex=False)
|
| 446 |
+
.str.strip()
|
| 447 |
+
)
|
| 448 |
+
enriched[prev_raise_col] = pd.to_numeric(normalized, errors="coerce") / 100
|
| 449 |
+
|
| 450 |
+
_safe_ratio("augementation_salaire_precedente", "revenu_mensuel", "augmentation_par_revenu")
|
| 451 |
+
_safe_ratio("annees_dans_le_poste_actuel", "annee_experience_totale", "annee_sur_poste_par_experience")
|
| 452 |
+
_safe_ratio("nb_formations_suivies", "annee_experience_totale", "nb_formation_par_experience")
|
| 453 |
+
_safe_ratio("annees_depuis_la_derniere_promotion", "annee_experience_totale", "dern_promo_par_experience")
|
| 454 |
+
|
| 455 |
+
existing_sats = [col for col in SATISFACTION_COLUMNS if col in enriched.columns]
|
| 456 |
+
if existing_sats:
|
| 457 |
+
enriched["score_moyen_satisfaction"] = pd.DataFrame(
|
| 458 |
+
{col: pd.to_numeric(enriched[col], errors="coerce") for col in existing_sats}
|
| 459 |
+
).mean(axis=1)
|
| 460 |
+
|
| 461 |
+
if {"note_evaluation_actuelle", "note_evaluation_precedente"}.issubset(enriched.columns):
|
| 462 |
+
enriched["evolution_note"] = pd.to_numeric(
|
| 463 |
+
enriched["note_evaluation_actuelle"], errors="coerce"
|
| 464 |
+
) - pd.to_numeric(enriched["note_evaluation_precedente"], errors="coerce")
|
| 465 |
+
|
| 466 |
+
return enriched.replace({pd.NA: np.nan})
|
| 467 |
+
|
| 468 |
+
|
| 469 |
+
def _merge_raw_sources(sirh_upload, evaluation_upload, sond_upload) -> pd.DataFrame:
|
| 470 |
+
"""Merge raw SIRH / evaluation / sondage CSVs uploaded by the user."""
|
| 471 |
+
|
| 472 |
+
settings = _ensure_settings()
|
| 473 |
+
sirh = ds.clean_text_values(_read_uploaded_csv(sirh_upload, "SIRH")).pipe(
|
| 474 |
+
ds._harmonize_id_column, settings.col_id, digits_only=True
|
| 475 |
+
)
|
| 476 |
+
evaluation = (
|
| 477 |
+
ds.clean_text_values(_read_uploaded_csv(evaluation_upload, "évaluation"))
|
| 478 |
+
.pipe(ds._rename_column, "eval_number", settings.col_id)
|
| 479 |
+
.pipe(ds._harmonize_id_column, settings.col_id, digits_only=True)
|
| 480 |
+
)
|
| 481 |
+
sond = (
|
| 482 |
+
ds.clean_text_values(_read_uploaded_csv(sond_upload, "sondage"))
|
| 483 |
+
.pipe(ds._rename_column, "code_sondage", settings.col_id)
|
| 484 |
+
.pipe(ds._harmonize_id_column, settings.col_id, digits_only=True)
|
| 485 |
+
)
|
| 486 |
+
|
| 487 |
+
for label, frame in {"SIRH": sirh, "évaluation": evaluation, "sondage": sond}.items():
|
| 488 |
+
if frame.empty:
|
| 489 |
+
raise gr.Error(f"Le fichier {label} est vide ou invalide.")
|
| 490 |
+
if settings.col_id not in frame.columns:
|
| 491 |
+
raise gr.Error(f"La colonne {settings.col_id} est absente du fichier {label}.")
|
| 492 |
+
|
| 493 |
+
merged = sirh.merge(evaluation, on=settings.col_id, how="inner").merge(sond, on=settings.col_id, how="inner")
|
| 494 |
+
if merged.empty:
|
| 495 |
+
raise gr.Error("Aucune ligne résultante aprÚs fusion des trois fichiers (jointure INNER vide).")
|
| 496 |
+
return merged
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
def _log_predictions(source: str, raw_inputs: pd.DataFrame, scored: pd.DataFrame) -> None:
|
| 500 |
+
"""Persist user interactions with the ML model into PostgreSQL."""
|
| 501 |
+
|
| 502 |
+
if SETTINGS is None or not SETTINGS.db_url:
|
| 503 |
+
return
|
| 504 |
+
settings = _ensure_settings()
|
| 505 |
+
try:
|
| 506 |
+
engine = _get_db_engine(settings)
|
| 507 |
+
except Exception as exc: # pragma: no cover - logging best effort
|
| 508 |
+
logger.error("Connexion impossible pour logger les interactions: {}", exc)
|
| 509 |
+
return
|
| 510 |
+
|
| 511 |
+
payload = raw_inputs.reindex(scored.index).fillna(value=pd.NA)
|
| 512 |
+
col_id = settings.col_id
|
| 513 |
+
records = []
|
| 514 |
+
for idx, row in scored.iterrows():
|
| 515 |
+
original = payload.loc[idx].to_dict() if idx in payload.index else {} # type: ignore
|
| 516 |
+
records.append(
|
| 517 |
+
{
|
| 518 |
+
"id_employee": row.get(col_id),
|
| 519 |
+
"probability": float(row.get("proba_depart", 0.0)),
|
| 520 |
+
"decision": int(row.get("prediction", 0)),
|
| 521 |
+
"threshold": THRESHOLD,
|
| 522 |
+
"source": source,
|
| 523 |
+
"payload": json.dumps(original, ensure_ascii=False, default=str),
|
| 524 |
+
}
|
| 525 |
+
)
|
| 526 |
+
|
| 527 |
+
if not records:
|
| 528 |
+
return
|
| 529 |
+
|
| 530 |
+
try:
|
| 531 |
+
pd.DataFrame(records).to_sql(
|
| 532 |
+
"prediction_logs",
|
| 533 |
+
engine,
|
| 534 |
+
schema=settings.db_schema,
|
| 535 |
+
if_exists="append",
|
| 536 |
+
index=False,
|
| 537 |
+
method="multi",
|
| 538 |
+
)
|
| 539 |
+
except Exception as exc: # pragma: no cover - logging best effort
|
| 540 |
+
logger.error("Impossible de journaliser les interactions: {}", exc)
|
| 541 |
+
|
| 542 |
+
|
| 543 |
def _ensure_model():
|
| 544 |
"""Ensure that a pipeline has been loaded before inference."""
|
| 545 |
if PIPELINE is None:
|
|
|
|
| 551 |
def score_table(table):
|
| 552 |
"""Score data entered via the interactive table."""
|
| 553 |
_ensure_model()
|
| 554 |
+
df = _convert_input(table, INPUT_FEATURES)
|
| 555 |
+
original = df.copy()
|
| 556 |
+
df = _apply_derived_features(df)
|
| 557 |
drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
|
| 558 |
+
scored = run_inference(
|
| 559 |
df,
|
| 560 |
PIPELINE,
|
| 561 |
THRESHOLD,
|
| 562 |
drop_columns=drop_cols,
|
| 563 |
required_features=FEATURE_ORDER or None,
|
| 564 |
)
|
| 565 |
+
_log_predictions("interactive_table", original, scored)
|
| 566 |
+
return scored
|
| 567 |
|
| 568 |
|
| 569 |
def score_csv(upload):
|
| 570 |
+
|
| 571 |
"""Score a CSV uploaded by the user."""
|
| 572 |
_ensure_model()
|
| 573 |
if upload is None:
|
| 574 |
raise gr.Error("Veuillez déposer un fichier CSV.")
|
| 575 |
df = pd.read_csv(upload.name)
|
| 576 |
+
original = df.copy()
|
| 577 |
+
df = _apply_derived_features(df)
|
| 578 |
drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
|
| 579 |
+
scored = run_inference(
|
| 580 |
df,
|
| 581 |
PIPELINE,
|
| 582 |
THRESHOLD,
|
| 583 |
drop_columns=drop_cols,
|
| 584 |
required_features=FEATURE_ORDER or None,
|
| 585 |
)
|
| 586 |
+
_log_predictions("csv_file", original, scored)
|
| 587 |
+
return scored
|
| 588 |
+
|
| 589 |
+
|
| 590 |
+
def score_raw_files(sirh_upload, evaluation_upload, sond_upload):
|
| 591 |
+
"""Score three raw CSVs (SIRH, évaluation, sondage) after merging them."""
|
| 592 |
+
|
| 593 |
+
_ensure_model()
|
| 594 |
+
merged = _merge_raw_sources(sirh_upload, evaluation_upload, sond_upload)
|
| 595 |
+
original = merged.copy()
|
| 596 |
+
df = _apply_derived_features(merged)
|
| 597 |
+
drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
|
| 598 |
+
scored = run_inference(
|
| 599 |
+
df,
|
| 600 |
+
PIPELINE,
|
| 601 |
+
THRESHOLD,
|
| 602 |
+
drop_columns=drop_cols,
|
| 603 |
+
required_features=FEATURE_ORDER or None,
|
| 604 |
+
)
|
| 605 |
+
_log_predictions("raw_files", original, scored)
|
| 606 |
+
return scored
|
| 607 |
|
| 608 |
|
| 609 |
def predict_from_form(*values):
|
| 610 |
"""Score a single row coming from the form tab."""
|
| 611 |
_ensure_model()
|
| 612 |
+
if not INPUT_FEATURES:
|
| 613 |
raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
|
| 614 |
+
payload = {feature: value for feature, value in zip(INPUT_FEATURES, values)}
|
| 615 |
df = pd.DataFrame([payload])
|
| 616 |
+
original = df.copy()
|
| 617 |
+
df = _apply_derived_features(df)
|
| 618 |
scored = run_inference(
|
| 619 |
df,
|
| 620 |
PIPELINE,
|
| 621 |
THRESHOLD,
|
| 622 |
required_features=FEATURE_ORDER or None,
|
| 623 |
)
|
| 624 |
+
_log_predictions("form", original, scored)
|
| 625 |
row = scored.iloc[0]
|
| 626 |
label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
|
| 627 |
return {
|
|
|
|
| 649 |
logger.warning("Artéfact manquant: {}", exc)
|
| 650 |
|
| 651 |
FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
|
| 652 |
+
INPUT_FEATURES = [feature for feature in FEATURE_ORDER if feature not in DERIVED_FEATURES]
|
| 653 |
+
if not INPUT_FEATURES:
|
| 654 |
+
INPUT_FEATURES = FEATURE_ORDER
|
| 655 |
+
numeric_from_schema = set(SCHEMA.get("numerical_features", []))
|
| 656 |
+
categorical_from_schema = set(SCHEMA.get("categorical_features", []))
|
| 657 |
+
if not numeric_from_schema:
|
| 658 |
+
numeric_from_schema = set((METADATA.get("features", {}).get("numerical") or []))
|
| 659 |
+
if not categorical_from_schema:
|
| 660 |
+
categorical_from_schema = set((METADATA.get("features", {}).get("categorical") or []))
|
| 661 |
+
NUMERIC_FEATURES = numeric_from_schema
|
| 662 |
+
CATEGORICAL_FEATURES = categorical_from_schema
|
| 663 |
|
| 664 |
with gr.Blocks(title="Prédicteur d'attrition") as demo:
|
| 665 |
+
gr.Markdown("# OCR Projet 5 â PrĂ©diction de dĂ©part employĂ©")
|
| 666 |
+
gr.HTML(
|
| 667 |
+
"""
|
| 668 |
+
<div style="display:flex; gap:0.5rem; flex-wrap:wrap;">
|
| 669 |
+
<a href="https://github.com/stephmnt/OCR_Projet05/releases" target="_blank" rel="noreferrer">
|
| 670 |
+
<img src="https://img.shields.io/github/v/release/stephmnt/OCR_Projet05" alt="GitHub Release" />
|
| 671 |
+
</a>
|
| 672 |
+
<a href="https://github.com/stephmnt/OCR_Projet05/actions/workflows/deploy.yml" target="_blank" rel="noreferrer">
|
| 673 |
+
<img src="https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml" alt="GitHub Actions Workflow Status" />
|
| 674 |
+
</a>
|
| 675 |
+
<a href="https://stephmnt.github.io/OCR_Projet05" target="_blank" rel="noreferrer">
|
| 676 |
+
<img src="https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff" alt="MkDocs" />
|
| 677 |
+
</a>
|
| 678 |
+
</div>
|
| 679 |
+
"""
|
| 680 |
+
)
|
| 681 |
gr.Markdown(
|
| 682 |
+
"Le modÚle fournit une probabilité de départ ainsi qu'une décision binaire."
|
| 683 |
)
|
| 684 |
|
| 685 |
if PIPELINE is None:
|
|
|
|
| 690 |
gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
|
| 691 |
|
| 692 |
with gr.Tab("Formulaire unitaire"):
|
| 693 |
+
if not INPUT_FEATURES:
|
| 694 |
gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
|
| 695 |
else:
|
| 696 |
form_inputs: list[gr.components.Component] = [] # type: ignore
|
| 697 |
+
for feature in INPUT_FEATURES:
|
| 698 |
+
form_inputs.append(_build_input_component(feature))
|
|
|
|
|
|
|
| 699 |
form_output = gr.JSON(label="Résultat")
|
| 700 |
gr.Button("Prédire").click(
|
| 701 |
fn=predict_from_form,
|
| 702 |
inputs=form_inputs,
|
| 703 |
outputs=form_output,
|
| 704 |
)
|
|
|
|
|
|
|
| 705 |
table_input = gr.Dataframe(
|
| 706 |
+
headers=INPUT_FEATURES if INPUT_FEATURES else None,
|
| 707 |
row_count=(1, "dynamic"),
|
| 708 |
+
col_count=(len(INPUT_FEATURES), "dynamic") if INPUT_FEATURES else (5, "dynamic"),
|
| 709 |
type="pandas",
|
| 710 |
)
|
| 711 |
table_output = gr.Dataframe(label="Prédictions", type="pandas")
|
|
|
|
| 715 |
outputs=table_output,
|
| 716 |
)
|
| 717 |
|
| 718 |
+
with gr.Tab("Fichier CSV fusionné"):
|
| 719 |
+
gr.Markdown("Un exemple de fichier à importer est disponible dans le dépÎt github : [`references/sample_employees.csv`](https://github.com/stephmnt/OCR_Projet05/blob/main/references/sample_employees.csv)")
|
| 720 |
file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
|
| 721 |
file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
|
| 722 |
gr.Button("Scorer le fichier").click(
|
|
|
|
| 725 |
outputs=file_output,
|
| 726 |
)
|
| 727 |
|
| 728 |
+
with gr.Tab("Fichiers non-mergés"):
|
| 729 |
+
gr.Markdown(
|
| 730 |
+
"Téléversez directement les trois fichiers bruts (SIRH, évaluation, sondage). "
|
| 731 |
+
"L'application reproduira automatiquement la fusion puis le scoring."
|
| 732 |
+
)
|
| 733 |
+
sirh_input = gr.File(file_types=[".csv"], label="Fichier SIRH")
|
| 734 |
+
evaluation_input = gr.File(file_types=[".csv"], label="Fichier Ăvaluation")
|
| 735 |
+
sond_input = gr.File(file_types=[".csv"], label="Fichier Sondage")
|
| 736 |
+
raw_output = gr.Dataframe(label="Résultats fusion automatique", type="pandas")
|
| 737 |
+
gr.Button("Fusionner et scorer").click(
|
| 738 |
+
fn=score_raw_files,
|
| 739 |
+
inputs=[sirh_input, evaluation_input, sond_input],
|
| 740 |
+
outputs=raw_output,
|
| 741 |
+
)
|
| 742 |
+
|
| 743 |
|
| 744 |
if __name__ == "__main__":
|
| 745 |
demo.launch()
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
name:
|
| 2 |
|
| 3 |
on:
|
| 4 |
push:
|
|
@@ -19,12 +19,16 @@ jobs:
|
|
| 19 |
- name: Setup Python
|
| 20 |
uses: actions/setup-python@v5
|
| 21 |
with:
|
| 22 |
-
python-version: "3.
|
| 23 |
|
| 24 |
- name: Install dependencies
|
| 25 |
run: |
|
| 26 |
python -m pip install --upgrade pip
|
| 27 |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
- name: Deploy to Hugging Face Space
|
| 30 |
env:
|
|
@@ -33,7 +37,7 @@ jobs:
|
|
| 33 |
git config --global user.email "actions@github.com"
|
| 34 |
git config --global user.name "GitHub Actions"
|
| 35 |
git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
|
| 36 |
-
rsync -av --exclude '.git' --exclude '
|
| 37 |
cd hf_space
|
| 38 |
git add .
|
| 39 |
git commit -m "đ Auto-deploy from GitHub Actions" || echo "No changes to commit"
|
|
|
|
| 1 |
+
name: Deploiement vers Hugging Face Spaces
|
| 2 |
|
| 3 |
on:
|
| 4 |
push:
|
|
|
|
| 19 |
- name: Setup Python
|
| 20 |
uses: actions/setup-python@v5
|
| 21 |
with:
|
| 22 |
+
python-version: "3.11"
|
| 23 |
|
| 24 |
- name: Install dependencies
|
| 25 |
run: |
|
| 26 |
python -m pip install --upgrade pip
|
| 27 |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
| 28 |
+
pip install -e .
|
| 29 |
+
|
| 30 |
+
- name: Préparer les données et le modÚle
|
| 31 |
+
run: python main.py
|
| 32 |
|
| 33 |
- name: Deploy to Hugging Face Space
|
| 34 |
env:
|
|
|
|
| 37 |
git config --global user.email "actions@github.com"
|
| 38 |
git config --global user.name "GitHub Actions"
|
| 39 |
git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
|
| 40 |
+
rsync -av --exclude '.git' --exclude 'docs' ./ hf_space/
|
| 41 |
cd hf_space
|
| 42 |
git add .
|
| 43 |
git commit -m "đ Auto-deploy from GitHub Actions" || echo "No changes to commit"
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/static.yml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Deploiement de la documentation
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: ["main"]
|
| 6 |
+
workflow_dispatch:
|
| 7 |
+
|
| 8 |
+
permissions:
|
| 9 |
+
contents: read
|
| 10 |
+
pages: write
|
| 11 |
+
id-token: write
|
| 12 |
+
|
| 13 |
+
concurrency:
|
| 14 |
+
group: "pages"
|
| 15 |
+
cancel-in-progress: false
|
| 16 |
+
|
| 17 |
+
jobs:
|
| 18 |
+
deploy:
|
| 19 |
+
environment:
|
| 20 |
+
name: github-pages
|
| 21 |
+
url: ${{ steps.deployment.outputs.page_url }}
|
| 22 |
+
runs-on: ubuntu-latest
|
| 23 |
+
steps:
|
| 24 |
+
- name: Checkout
|
| 25 |
+
uses: actions/checkout@v4
|
| 26 |
+
|
| 27 |
+
- name: Setup Pages
|
| 28 |
+
uses: actions/configure-pages@v5
|
| 29 |
+
|
| 30 |
+
- name: Upload artifact
|
| 31 |
+
uses: actions/upload-pages-artifact@v3
|
| 32 |
+
with:
|
| 33 |
+
path: 'docs/site'
|
| 34 |
+
|
| 35 |
+
- name: Deploy to GitHub Pages
|
| 36 |
+
id: deployment
|
| 37 |
+
uses: actions/deploy-pages@v4
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore
CHANGED
|
@@ -1,19 +1,18 @@
|
|
| 1 |
-
#
|
| 2 |
-
/data/
|
| 3 |
-
|
| 4 |
-
# Mac OS-specific storage files
|
| 5 |
.DS_Store
|
| 6 |
*.code-workspace
|
| 7 |
*.pdf
|
| 8 |
/output/
|
| 9 |
questions.md
|
| 10 |
-
|
| 11 |
-
|
|
|
|
| 12 |
|
| 13 |
# vim
|
| 14 |
*.swp
|
| 15 |
*.swo
|
| 16 |
|
|
|
|
| 17 |
## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
|
| 18 |
|
| 19 |
# Byte-compiled / optimized / DLL files
|
|
@@ -86,9 +85,6 @@ instance/
|
|
| 86 |
# Scrapy stuff:
|
| 87 |
.scrapy
|
| 88 |
|
| 89 |
-
# MkDocs documentation
|
| 90 |
-
docs/site/
|
| 91 |
-
|
| 92 |
# PyBuilder
|
| 93 |
.pybuilder/
|
| 94 |
target/
|
|
@@ -166,9 +162,6 @@ venv.bak/
|
|
| 166 |
# Rope project settings
|
| 167 |
.ropeproject
|
| 168 |
|
| 169 |
-
# mkdocs documentation
|
| 170 |
-
/site
|
| 171 |
-
|
| 172 |
# mypy
|
| 173 |
.mypy_cache/
|
| 174 |
.dmypy.json
|
|
|
|
| 1 |
+
# Spécifique à ce projet
|
|
|
|
|
|
|
|
|
|
| 2 |
.DS_Store
|
| 3 |
*.code-workspace
|
| 4 |
*.pdf
|
| 5 |
/output/
|
| 6 |
questions.md
|
| 7 |
+
/reports/
|
| 8 |
+
/data/
|
| 9 |
+
runtime.txt
|
| 10 |
|
| 11 |
# vim
|
| 12 |
*.swp
|
| 13 |
*.swo
|
| 14 |
|
| 15 |
+
|
| 16 |
## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
|
| 17 |
|
| 18 |
# Byte-compiled / optimized / DLL files
|
|
|
|
| 85 |
# Scrapy stuff:
|
| 86 |
.scrapy
|
| 87 |
|
|
|
|
|
|
|
|
|
|
| 88 |
# PyBuilder
|
| 89 |
.pybuilder/
|
| 90 |
target/
|
|
|
|
| 162 |
# Rope project settings
|
| 163 |
.ropeproject
|
| 164 |
|
|
|
|
|
|
|
|
|
|
| 165 |
# mypy
|
| 166 |
.mypy_cache/
|
| 167 |
.dmypy.json
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md
CHANGED
|
@@ -8,15 +8,17 @@ sdk_version: 5.49.1
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: true
|
| 10 |
short_description: Projet 05 formation Openclassrooms
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
-
# projet_05
|
| 14 |
|
| 15 |
<a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
|
| 16 |
<img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
|
| 17 |
</a>
|
| 18 |
|
| 19 |
-
|
|
|
|
| 20 |
|
| 21 |
## Organisation du projet
|
| 22 |
|
|
@@ -305,10 +307,6 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
|
|
| 305 |
[stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
|
| 306 |
[issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 307 |
[issues-url]: https://github.com/stephmnt/OCR_projet05/issues
|
| 308 |
-
[license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 309 |
-
[license-url]: https://github.com/stephmnt/OCR_projet05/blob/master/LICENSE.txt
|
| 310 |
-
[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
|
| 311 |
-
[linkedin-url]: https://linkedin.com/in/stephanemanet
|
| 312 |
[product-screenshot]: images/screenshot.png
|
| 313 |
[Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
|
| 314 |
<!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
|
|
@@ -328,9 +326,15 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
|
|
| 328 |
[Bootstrap-url]: https://getbootstrap.com
|
| 329 |
[JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
|
| 330 |
[JQuery-url]: https://jquery.com
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
<!-- TODO: -->
|
| 332 |
-
[
|
| 333 |
-
[
|
| 334 |
-
[
|
|
|
|
| 335 |
[NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
|
| 336 |
[](#)
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: true
|
| 10 |
short_description: Projet 05 formation Openclassrooms
|
| 11 |
+
python_version: 3.11
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# projet_05 : Déployez un modÚle de Machine Learning
|
| 15 |
|
| 16 |
<a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
|
| 17 |
<img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
|
| 18 |
</a>
|
| 19 |
|
| 20 |
+
[![mkdocs-shield]][mkdocs-url]
|
| 21 |
+
|
| 22 |
|
| 23 |
## Organisation du projet
|
| 24 |
|
|
|
|
| 307 |
[stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
|
| 308 |
[issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 309 |
[issues-url]: https://github.com/stephmnt/OCR_projet05/issues
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
[product-screenshot]: images/screenshot.png
|
| 311 |
[Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
|
| 312 |
<!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
|
|
|
|
| 326 |
[Bootstrap-url]: https://getbootstrap.com
|
| 327 |
[JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
|
| 328 |
[JQuery-url]: https://jquery.com
|
| 329 |
+
<!-- OK -->
|
| 330 |
+
[license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 331 |
+
[license-url]: https://github.com/stephmnt/OCR_Projet05/blob/main/LICENSE
|
| 332 |
+
[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
|
| 333 |
+
[linkedin-url]: https://linkedin.com/in/stephanemanet
|
| 334 |
<!-- TODO: -->
|
| 335 |
+
[postgres-shield]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
|
| 336 |
+
[python-shield]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
|
| 337 |
+
[mkdocs-shield]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
|
| 338 |
+
[mkdocs-url]: https://stephmnt.github.io/OCR_Projet05/
|
| 339 |
[NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
|
| 340 |
[](#)
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py
CHANGED
|
@@ -17,12 +17,30 @@ SCHEMA_PATH = Path("data/processed/schema.json")
|
|
| 17 |
|
| 18 |
|
| 19 |
def _load_schema(path: Path) -> dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
if not path.exists():
|
| 21 |
return {}
|
| 22 |
return json.loads(path.read_text(encoding="utf-8"))
|
| 23 |
|
| 24 |
|
| 25 |
def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
if schema:
|
| 27 |
candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
|
| 28 |
if candidates:
|
|
@@ -37,6 +55,18 @@ def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
|
|
| 37 |
|
| 38 |
|
| 39 |
def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
if isinstance(payload, pd.DataFrame):
|
| 41 |
df = payload.copy()
|
| 42 |
elif payload is None:
|
|
@@ -50,6 +80,7 @@ def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
|
|
| 50 |
|
| 51 |
|
| 52 |
def _ensure_model():
|
|
|
|
| 53 |
if PIPELINE is None:
|
| 54 |
raise gr.Error(
|
| 55 |
"Aucun modÚle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
|
|
@@ -57,6 +88,7 @@ def _ensure_model():
|
|
| 57 |
|
| 58 |
|
| 59 |
def score_table(table):
|
|
|
|
| 60 |
_ensure_model()
|
| 61 |
df = _convert_input(table, FEATURE_ORDER)
|
| 62 |
drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
|
|
@@ -70,6 +102,7 @@ def score_table(table):
|
|
| 70 |
|
| 71 |
|
| 72 |
def score_csv(upload):
|
|
|
|
| 73 |
_ensure_model()
|
| 74 |
if upload is None:
|
| 75 |
raise gr.Error("Veuillez déposer un fichier CSV.")
|
|
@@ -85,6 +118,7 @@ def score_csv(upload):
|
|
| 85 |
|
| 86 |
|
| 87 |
def predict_from_form(*values):
|
|
|
|
| 88 |
_ensure_model()
|
| 89 |
if not FEATURE_ORDER:
|
| 90 |
raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
|
|
@@ -132,7 +166,7 @@ with gr.Blocks(title="Prédicteur d'attrition") as demo:
|
|
| 132 |
|
| 133 |
if PIPELINE is None:
|
| 134 |
gr.Markdown(
|
| 135 |
-
"
|
| 136 |
)
|
| 137 |
else:
|
| 138 |
gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
def _load_schema(path: Path) -> dict[str, Any]:
|
| 20 |
+
"""Load the schema definition stored as JSON.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
path: Path to the schema.json file.
|
| 24 |
+
|
| 25 |
+
Returns:
|
| 26 |
+
A dictionary describing the schema or an empty dict if the file is missing.
|
| 27 |
+
"""
|
| 28 |
if not path.exists():
|
| 29 |
return {}
|
| 30 |
return json.loads(path.read_text(encoding="utf-8"))
|
| 31 |
|
| 32 |
|
| 33 |
def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
|
| 34 |
+
"""Infer the ordered list of features expected by the model.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
metadata: Metadata produced during training.
|
| 38 |
+
schema: Schema derived from `features.py`.
|
| 39 |
+
pipeline: Loaded sklearn pipeline (optional).
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
List of feature names in the order expected by the model.
|
| 43 |
+
"""
|
| 44 |
if schema:
|
| 45 |
candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
|
| 46 |
if candidates:
|
|
|
|
| 55 |
|
| 56 |
|
| 57 |
def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
|
| 58 |
+
"""Normalize any user input into a validated DataFrame.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
payload: Raw table coming from Gradio (DataFrame, list, etc.).
|
| 62 |
+
headers: Expected column names.
|
| 63 |
+
|
| 64 |
+
Returns:
|
| 65 |
+
A sanitized DataFrame.
|
| 66 |
+
|
| 67 |
+
Raises:
|
| 68 |
+
gr.Error: If no valid row is provided.
|
| 69 |
+
"""
|
| 70 |
if isinstance(payload, pd.DataFrame):
|
| 71 |
df = payload.copy()
|
| 72 |
elif payload is None:
|
|
|
|
| 80 |
|
| 81 |
|
| 82 |
def _ensure_model():
|
| 83 |
+
"""Ensure that a pipeline has been loaded before inference."""
|
| 84 |
if PIPELINE is None:
|
| 85 |
raise gr.Error(
|
| 86 |
"Aucun modÚle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
|
|
|
|
| 88 |
|
| 89 |
|
| 90 |
def score_table(table):
|
| 91 |
+
"""Score data entered via the interactive table."""
|
| 92 |
_ensure_model()
|
| 93 |
df = _convert_input(table, FEATURE_ORDER)
|
| 94 |
drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
|
|
|
|
| 102 |
|
| 103 |
|
| 104 |
def score_csv(upload):
|
| 105 |
+
"""Score a CSV uploaded by the user."""
|
| 106 |
_ensure_model()
|
| 107 |
if upload is None:
|
| 108 |
raise gr.Error("Veuillez déposer un fichier CSV.")
|
|
|
|
| 118 |
|
| 119 |
|
| 120 |
def predict_from_form(*values):
|
| 121 |
+
"""Score a single row coming from the form tab."""
|
| 122 |
_ensure_model()
|
| 123 |
if not FEATURE_ORDER:
|
| 124 |
raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
|
|
|
|
| 166 |
|
| 167 |
if PIPELINE is None:
|
| 168 |
gr.Markdown(
|
| 169 |
+
"**Aucun modĂšle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
|
| 170 |
)
|
| 171 |
else:
|
| 172 |
gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore
CHANGED
|
@@ -4,6 +4,11 @@
|
|
| 4 |
# Mac OS-specific storage files
|
| 5 |
.DS_Store
|
| 6 |
*.code-workspace
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# vim
|
| 9 |
*.swp
|
|
|
|
| 4 |
# Mac OS-specific storage files
|
| 5 |
.DS_Store
|
| 6 |
*.code-workspace
|
| 7 |
+
*.pdf
|
| 8 |
+
/output/
|
| 9 |
+
questions.md
|
| 10 |
+
*.pdf
|
| 11 |
+
|
| 12 |
|
| 13 |
# vim
|
| 14 |
*.swp
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
# projet_05
|
| 2 |
-
|
| 3 |
---
|
| 4 |
title: OCR_Projet05
|
| 5 |
emoji: đ„
|
|
@@ -12,6 +10,8 @@ pinned: true
|
|
| 12 |
short_description: Projet 05 formation Openclassrooms
|
| 13 |
---
|
| 14 |
|
|
|
|
|
|
|
| 15 |
<a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
|
| 16 |
<img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
|
| 17 |
</a>
|
|
@@ -76,17 +76,6 @@ Déployez un modÚle de Machine Learning
|
|
| 76 |
|
| 77 |
--------
|
| 78 |
|
| 79 |
-
---
|
| 80 |
-
title: Projet 05
|
| 81 |
-
emoji: đ
|
| 82 |
-
colorFrom: indigo
|
| 83 |
-
colorTo: green
|
| 84 |
-
sdk: gradio
|
| 85 |
-
sdk_version: 5.49.1
|
| 86 |
-
app_file: app.py
|
| 87 |
-
pinned: false
|
| 88 |
-
---
|
| 89 |
-
|
| 90 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 91 |
|
| 92 |
<!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
|
|
@@ -99,8 +88,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
|
|
| 99 |
*** Thanks again! Now go create something AMAZING! :D
|
| 100 |
-->
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
<!-- PROJECT SHIELDS -->
|
| 105 |
<!--
|
| 106 |
*** I'm using markdown "reference style" links for readability.
|
|
@@ -118,8 +105,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
|
|
| 118 |
[![LinkedIn][linkedin-shield]][linkedin-url]
|
| 119 |

|
| 120 |
|
| 121 |
-
|
| 122 |
-
|
| 123 |
<!-- PROJECT LOGO -->
|
| 124 |
<br />
|
| 125 |
<div align="center">
|
|
@@ -143,8 +128,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
|
|
| 143 |
</p>
|
| 144 |
</div>
|
| 145 |
|
| 146 |
-
|
| 147 |
-
|
| 148 |
<!-- TABLE OF CONTENTS -->
|
| 149 |
<details>
|
| 150 |
<summary>Table of Contents</summary>
|
|
@@ -191,8 +174,6 @@ Here's a blank template to get started. To avoid retyping too much info, do a se
|
|
| 191 |
|
| 192 |
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 193 |
|
| 194 |
-
|
| 195 |
-
|
| 196 |
<!-- GETTING STARTED -->
|
| 197 |
## Getting Started
|
| 198 |
|
|
@@ -212,20 +193,19 @@ This is an example of how to list things you need to use the software and how to
|
|
| 212 |
pip install -r requirements.txt
|
| 213 |
uvicorn app.main:app --reload
|
| 214 |
|
| 215 |
-
1.
|
| 216 |
-
2. Clone the repo
|
| 217 |
```sh
|
| 218 |
-
git clone https://github.com/
|
| 219 |
```
|
| 220 |
-
|
| 221 |
```sh
|
| 222 |
npm install
|
| 223 |
```
|
| 224 |
-
|
| 225 |
```js
|
| 226 |
const API_KEY = 'ENTER YOUR API';
|
| 227 |
```
|
| 228 |
-
|
| 229 |
```sh
|
| 230 |
git remote set-url origin github_username/repo_name
|
| 231 |
git remote -v # confirm the changes
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: OCR_Projet05
|
| 3 |
emoji: đ„
|
|
|
|
| 10 |
short_description: Projet 05 formation Openclassrooms
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# projet_05
|
| 14 |
+
|
| 15 |
<a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
|
| 16 |
<img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
|
| 17 |
</a>
|
|
|
|
| 76 |
|
| 77 |
--------
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 80 |
|
| 81 |
<!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
|
|
|
|
| 88 |
*** Thanks again! Now go create something AMAZING! :D
|
| 89 |
-->
|
| 90 |
|
|
|
|
|
|
|
| 91 |
<!-- PROJECT SHIELDS -->
|
| 92 |
<!--
|
| 93 |
*** I'm using markdown "reference style" links for readability.
|
|
|
|
| 105 |
[![LinkedIn][linkedin-shield]][linkedin-url]
|
| 106 |

|
| 107 |
|
|
|
|
|
|
|
| 108 |
<!-- PROJECT LOGO -->
|
| 109 |
<br />
|
| 110 |
<div align="center">
|
|
|
|
| 128 |
</p>
|
| 129 |
</div>
|
| 130 |
|
|
|
|
|
|
|
| 131 |
<!-- TABLE OF CONTENTS -->
|
| 132 |
<details>
|
| 133 |
<summary>Table of Contents</summary>
|
|
|
|
| 174 |
|
| 175 |
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 176 |
|
|
|
|
|
|
|
| 177 |
<!-- GETTING STARTED -->
|
| 178 |
## Getting Started
|
| 179 |
|
|
|
|
| 193 |
pip install -r requirements.txt
|
| 194 |
uvicorn app.main:app --reload
|
| 195 |
|
| 196 |
+
1. Clone the repo
|
|
|
|
| 197 |
```sh
|
| 198 |
+
git clone https://github.com/stephmnt/OCR_Projet05.git
|
| 199 |
```
|
| 200 |
+
2. Install NPM packages
|
| 201 |
```sh
|
| 202 |
npm install
|
| 203 |
```
|
| 204 |
+
3. Enter your API in `config.js`
|
| 205 |
```js
|
| 206 |
const API_KEY = 'ENTER YOUR API';
|
| 207 |
```
|
| 208 |
+
4. Change git remote url to avoid accidental pushes to base project
|
| 209 |
```sh
|
| 210 |
git remote set-url origin github_username/repo_name
|
| 211 |
git remote -v # confirm the changes
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml
CHANGED
|
@@ -33,8 +33,8 @@ jobs:
|
|
| 33 |
git config --global user.email "actions@github.com"
|
| 34 |
git config --global user.name "GitHub Actions"
|
| 35 |
git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
|
| 36 |
-
rsync -av --exclude '.git' ./ hf_space/
|
| 37 |
cd hf_space
|
| 38 |
git add .
|
| 39 |
git commit -m "đ Auto-deploy from GitHub Actions" || echo "No changes to commit"
|
| 40 |
-
git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
|
|
|
|
| 33 |
git config --global user.email "actions@github.com"
|
| 34 |
git config --global user.name "GitHub Actions"
|
| 35 |
git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
|
| 36 |
+
rsync -av --exclude '.git' --exclude 'output/' --exclude 'models/' ./ hf_space/
|
| 37 |
cd hf_space
|
| 38 |
git add .
|
| 39 |
git commit -m "đ Auto-deploy from GitHub Actions" || echo "No changes to commit"
|
| 40 |
+
git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
| 1 |
# projet_05
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
<a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
|
| 4 |
<img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
|
| 5 |
</a>
|
|
@@ -57,6 +69,11 @@ Déployez un modÚle de Machine Learning
|
|
| 57 |
âââ plots.py <- Code to create visualizations
|
| 58 |
```
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
--------
|
| 61 |
|
| 62 |
---
|
|
@@ -93,6 +110,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
|
|
| 93 |
*** https://www.markdownguide.org/basic-syntax/#reference-style-links
|
| 94 |
-->
|
| 95 |
[![Contributors][contributors-shield]][contributors-url]
|
|
|
|
| 96 |
[![Forks][forks-shield]][forks-url]
|
| 97 |
[![Stargazers][stars-shield]][stars-url]
|
| 98 |
[![Issues][issues-shield]][issues-url]
|
|
@@ -236,7 +254,7 @@ _For more examples, please refer to the [Documentation](https://example.com)_
|
|
| 236 |
- [ ] Feature 3
|
| 237 |
- [ ] Nested Feature
|
| 238 |
|
| 239 |
-
See the [open issues](https://github.com/
|
| 240 |
|
| 241 |
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 242 |
|
|
@@ -299,18 +317,18 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
|
|
| 299 |
|
| 300 |
<!-- MARKDOWN LINKS & IMAGES -->
|
| 301 |
<!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
|
| 302 |
-
[contributors-shield]: https://img.shields.io/github/contributors/
|
| 303 |
-
[contributors-url]: https://github.com/
|
| 304 |
-
[forks-shield]: https://img.shields.io/github/forks/
|
| 305 |
-
[forks-url]: https://github.com/
|
| 306 |
-
[stars-shield]: https://img.shields.io/github/stars/
|
| 307 |
-
[stars-url]: https://github.com/
|
| 308 |
-
[issues-shield]: https://img.shields.io/github/issues/
|
| 309 |
-
[issues-url]: https://github.com/
|
| 310 |
-
[license-shield]: https://img.shields.io/github/license/
|
| 311 |
-
[license-url]: https://github.com/
|
| 312 |
[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
|
| 313 |
-
[linkedin-url]: https://linkedin.com/in/
|
| 314 |
[product-screenshot]: images/screenshot.png
|
| 315 |
[Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
|
| 316 |
<!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
|
|
@@ -331,10 +349,8 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
|
|
| 331 |
[JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
|
| 332 |
[JQuery-url]: https://jquery.com
|
| 333 |
<!-- TODO: -->
|
| 334 |
-
[
|
| 335 |
-
[
|
| 336 |
-
[
|
| 337 |
-
[
|
| 338 |
-
[](#)
|
| 339 |
[](#)
|
| 340 |
-
[](#)[text](../projet_04/.gitignore)
|
|
|
|
| 1 |
# projet_05
|
| 2 |
|
| 3 |
+
---
|
| 4 |
+
title: OCR_Projet05
|
| 5 |
+
emoji: đ„
|
| 6 |
+
colorFrom: purple
|
| 7 |
+
colorTo: purple
|
| 8 |
+
sdk: gradio
|
| 9 |
+
sdk_version: 5.49.1
|
| 10 |
+
app_file: app.py
|
| 11 |
+
pinned: true
|
| 12 |
+
short_description: Projet 05 formation Openclassrooms
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
<a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
|
| 16 |
<img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
|
| 17 |
</a>
|
|
|
|
| 69 |
âââ plots.py <- Code to create visualizations
|
| 70 |
```
|
| 71 |
|
| 72 |
+
## Code hérité réutilisé
|
| 73 |
+
|
| 74 |
+
- `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thÚme.
|
| 75 |
+
- `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
|
| 76 |
+
|
| 77 |
--------
|
| 78 |
|
| 79 |
---
|
|
|
|
| 110 |
*** https://www.markdownguide.org/basic-syntax/#reference-style-links
|
| 111 |
-->
|
| 112 |
[![Contributors][contributors-shield]][contributors-url]
|
| 113 |
+
[![Python][python]][python]
|
| 114 |
[![Forks][forks-shield]][forks-url]
|
| 115 |
[![Stargazers][stars-shield]][stars-url]
|
| 116 |
[![Issues][issues-shield]][issues-url]
|
|
|
|
| 254 |
- [ ] Feature 3
|
| 255 |
- [ ] Nested Feature
|
| 256 |
|
| 257 |
+
See the [open issues](https://github.com/stephmnt/OCR_projet05/issues) for a full list of proposed features (and known issues).
|
| 258 |
|
| 259 |
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 260 |
|
|
|
|
| 317 |
|
| 318 |
<!-- MARKDOWN LINKS & IMAGES -->
|
| 319 |
<!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
|
| 320 |
+
[contributors-shield]: https://img.shields.io/github/contributors/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 321 |
+
[contributors-url]: https://github.com/stephmnt/OCR_projet05/graphs/contributors
|
| 322 |
+
[forks-shield]: https://img.shields.io/github/forks/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 323 |
+
[forks-url]: https://github.com/stephmnt/OCR_projet05/network/members
|
| 324 |
+
[stars-shield]: https://img.shields.io/github/stars/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 325 |
+
[stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
|
| 326 |
+
[issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 327 |
+
[issues-url]: https://github.com/stephmnt/OCR_projet05/issues
|
| 328 |
+
[license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 329 |
+
[license-url]: https://github.com/stephmnt/OCR_projet05/blob/master/LICENSE.txt
|
| 330 |
[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
|
| 331 |
+
[linkedin-url]: https://linkedin.com/in/stephanemanet
|
| 332 |
[product-screenshot]: images/screenshot.png
|
| 333 |
[Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
|
| 334 |
<!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
|
|
|
|
| 349 |
[JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
|
| 350 |
[JQuery-url]: https://jquery.com
|
| 351 |
<!-- TODO: -->
|
| 352 |
+
[Postgres]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
|
| 353 |
+
[Python]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
|
| 354 |
+
[MkDocs]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
|
| 355 |
+
[NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
|
|
|
|
| 356 |
[](#)
|
|
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/LICENSE
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
The MIT License (MIT)
|
| 3 |
+
Copyright (c) 2025, Stéphane Manet
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
| 6 |
+
|
| 7 |
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
| 8 |
+
|
| 9 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
| 10 |
+
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Makefile
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#################################################################################
|
| 2 |
+
# GLOBALS #
|
| 3 |
+
#################################################################################
|
| 4 |
+
|
| 5 |
+
PROJECT_NAME = OCR_projet05
|
| 6 |
+
PYTHON_VERSION = 3.10
|
| 7 |
+
PYTHON_INTERPRETER = python
|
| 8 |
+
|
| 9 |
+
#################################################################################
|
| 10 |
+
# COMMANDS #
|
| 11 |
+
#################################################################################
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
## Install Python dependencies
|
| 15 |
+
.PHONY: requirements
|
| 16 |
+
requirements:
|
| 17 |
+
pip install -e .
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
## Delete all compiled Python files
|
| 23 |
+
.PHONY: clean
|
| 24 |
+
clean:
|
| 25 |
+
find . -type f -name "*.py[co]" -delete
|
| 26 |
+
find . -type d -name "__pycache__" -delete
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
## Lint using ruff (use `make format` to do formatting)
|
| 30 |
+
.PHONY: lint
|
| 31 |
+
lint:
|
| 32 |
+
ruff format --check
|
| 33 |
+
ruff check
|
| 34 |
+
|
| 35 |
+
## Format source code with ruff
|
| 36 |
+
.PHONY: format
|
| 37 |
+
format:
|
| 38 |
+
ruff check --fix
|
| 39 |
+
ruff format
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
## Run tests
|
| 44 |
+
.PHONY: test
|
| 45 |
+
test:
|
| 46 |
+
python -m pytest tests
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
## Set up Python interpreter environment
|
| 50 |
+
.PHONY: create_environment
|
| 51 |
+
create_environment:
|
| 52 |
+
@bash -c "if [ ! -z `which virtualenvwrapper.sh` ]; then source `which virtualenvwrapper.sh`; mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); else mkvirtualenv.bat $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); fi"
|
| 53 |
+
@echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
#################################################################################
|
| 59 |
+
# PROJECT RULES #
|
| 60 |
+
#################################################################################
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
## Make dataset
|
| 64 |
+
.PHONY: data
|
| 65 |
+
data: requirements
|
| 66 |
+
$(PYTHON_INTERPRETER) projet_05/dataset.py
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
#################################################################################
|
| 70 |
+
# Self Documenting Commands #
|
| 71 |
+
#################################################################################
|
| 72 |
+
|
| 73 |
+
.DEFAULT_GOAL := help
|
| 74 |
+
|
| 75 |
+
define PRINT_HELP_PYSCRIPT
|
| 76 |
+
import re, sys; \
|
| 77 |
+
lines = '\n'.join([line for line in sys.stdin]); \
|
| 78 |
+
matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
|
| 79 |
+
print('Available rules:\n'); \
|
| 80 |
+
print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
|
| 81 |
+
endef
|
| 82 |
+
export PRINT_HELP_PYSCRIPT
|
| 83 |
+
|
| 84 |
+
help:
|
| 85 |
+
@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py
CHANGED
|
@@ -1,7 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
def greet(name):
|
| 4 |
-
return "Hello " + name + "!!"
|
| 5 |
|
| 6 |
-
|
| 7 |
-
demo.launch()
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
import gradio as gr
|
| 8 |
+
import pandas as pd
|
| 9 |
+
from loguru import logger
|
| 10 |
+
|
| 11 |
+
from projet_05.branding import apply_brand_theme
|
| 12 |
+
from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
|
| 13 |
+
|
| 14 |
+
MODEL_PATH = Path("models/best_model.joblib")
|
| 15 |
+
METADATA_PATH = Path("models/best_model_meta.json")
|
| 16 |
+
SCHEMA_PATH = Path("data/processed/schema.json")
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _load_schema(path: Path) -> dict[str, Any]:
|
| 20 |
+
if not path.exists():
|
| 21 |
+
return {}
|
| 22 |
+
return json.loads(path.read_text(encoding="utf-8"))
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
|
| 26 |
+
if schema:
|
| 27 |
+
candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
|
| 28 |
+
if candidates:
|
| 29 |
+
return candidates
|
| 30 |
+
features = metadata.get("features", {})
|
| 31 |
+
explicit = (features.get("numerical") or []) + (features.get("categorical") or [])
|
| 32 |
+
if explicit:
|
| 33 |
+
return explicit
|
| 34 |
+
if pipeline is not None and hasattr(pipeline, "feature_names_in_"):
|
| 35 |
+
return list(pipeline.feature_names_in_)
|
| 36 |
+
return []
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
|
| 40 |
+
if isinstance(payload, pd.DataFrame):
|
| 41 |
+
df = payload.copy()
|
| 42 |
+
elif payload is None:
|
| 43 |
+
df = pd.DataFrame(columns=headers)
|
| 44 |
+
else:
|
| 45 |
+
df = pd.DataFrame(payload, columns=headers if headers else None)
|
| 46 |
+
df = df.dropna(how="all")
|
| 47 |
+
if df.empty:
|
| 48 |
+
raise gr.Error("Merci de saisir au moins une ligne complĂšte.")
|
| 49 |
+
return df
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _ensure_model():
|
| 53 |
+
if PIPELINE is None:
|
| 54 |
+
raise gr.Error(
|
| 55 |
+
"Aucun modÚle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def score_table(table):
|
| 60 |
+
_ensure_model()
|
| 61 |
+
df = _convert_input(table, FEATURE_ORDER)
|
| 62 |
+
drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
|
| 63 |
+
return run_inference(
|
| 64 |
+
df,
|
| 65 |
+
PIPELINE,
|
| 66 |
+
THRESHOLD,
|
| 67 |
+
drop_columns=drop_cols,
|
| 68 |
+
required_features=FEATURE_ORDER or None,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def score_csv(upload):
|
| 73 |
+
_ensure_model()
|
| 74 |
+
if upload is None:
|
| 75 |
+
raise gr.Error("Veuillez déposer un fichier CSV.")
|
| 76 |
+
df = pd.read_csv(upload.name)
|
| 77 |
+
drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
|
| 78 |
+
return run_inference(
|
| 79 |
+
df,
|
| 80 |
+
PIPELINE,
|
| 81 |
+
THRESHOLD,
|
| 82 |
+
drop_columns=drop_cols,
|
| 83 |
+
required_features=FEATURE_ORDER or None,
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def predict_from_form(*values):
|
| 88 |
+
_ensure_model()
|
| 89 |
+
if not FEATURE_ORDER:
|
| 90 |
+
raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
|
| 91 |
+
payload = {feature: value for feature, value in zip(FEATURE_ORDER, values)}
|
| 92 |
+
df = pd.DataFrame([payload])
|
| 93 |
+
scored = run_inference(
|
| 94 |
+
df,
|
| 95 |
+
PIPELINE,
|
| 96 |
+
THRESHOLD,
|
| 97 |
+
required_features=FEATURE_ORDER or None,
|
| 98 |
+
)
|
| 99 |
+
row = scored.iloc[0]
|
| 100 |
+
label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
|
| 101 |
+
return {
|
| 102 |
+
"probability": round(float(row["proba_depart"]), 4),
|
| 103 |
+
"decision": label,
|
| 104 |
+
"threshold": THRESHOLD,
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
# Chargement des artéfacts
|
| 109 |
+
apply_brand_theme()
|
| 110 |
+
|
| 111 |
+
PIPELINE = None
|
| 112 |
+
METADATA: dict[str, Any] = {}
|
| 113 |
+
THRESHOLD = 0.5
|
| 114 |
+
TARGET_COLUMN: str | None = None
|
| 115 |
+
SCHEMA = _load_schema(SCHEMA_PATH)
|
| 116 |
+
|
| 117 |
+
try:
|
| 118 |
+
PIPELINE = load_pipeline(MODEL_PATH)
|
| 119 |
+
METADATA = load_metadata(METADATA_PATH)
|
| 120 |
+
THRESHOLD = float(METADATA.get("best_threshold", THRESHOLD))
|
| 121 |
+
TARGET_COLUMN = METADATA.get("target")
|
| 122 |
+
except FileNotFoundError as exc:
|
| 123 |
+
logger.warning("Artéfact manquant: {}", exc)
|
| 124 |
+
|
| 125 |
+
FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
|
| 126 |
+
|
| 127 |
+
with gr.Blocks(title="Prédicteur d'attrition") as demo:
|
| 128 |
+
gr.Markdown("# API Gradio â PrĂ©diction de dĂ©part employĂ©")
|
| 129 |
+
gr.Markdown(
|
| 130 |
+
"Le modÚle applique le pipeline entraßné hors-notebook pour fournir une probabilité de départ ainsi qu'une décision binaire."
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
if PIPELINE is None:
|
| 134 |
+
gr.Markdown(
|
| 135 |
+
"â ïž **Aucun modĂšle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
|
| 136 |
+
)
|
| 137 |
+
else:
|
| 138 |
+
gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
|
| 139 |
+
|
| 140 |
+
with gr.Tab("Formulaire unitaire"):
|
| 141 |
+
if not FEATURE_ORDER:
|
| 142 |
+
gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
|
| 143 |
+
else:
|
| 144 |
+
form_inputs: list[gr.components.Component] = [] # type: ignore
|
| 145 |
+
for feature in FEATURE_ORDER:
|
| 146 |
+
form_inputs.append(
|
| 147 |
+
gr.Textbox(label=feature, placeholder=f"Saisir {feature.replace('_', ' ')}")
|
| 148 |
+
)
|
| 149 |
+
form_output = gr.JSON(label="Résultat")
|
| 150 |
+
gr.Button("Prédire").click(
|
| 151 |
+
fn=predict_from_form,
|
| 152 |
+
inputs=form_inputs,
|
| 153 |
+
outputs=form_output,
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
with gr.Tab("Tableau interactif"):
|
| 157 |
+
table_input = gr.Dataframe(
|
| 158 |
+
headers=FEATURE_ORDER if FEATURE_ORDER else None,
|
| 159 |
+
row_count=(1, "dynamic"),
|
| 160 |
+
col_count=(len(FEATURE_ORDER), "dynamic") if FEATURE_ORDER else (5, "dynamic"),
|
| 161 |
+
type="pandas",
|
| 162 |
+
)
|
| 163 |
+
table_output = gr.Dataframe(label="Prédictions", type="pandas")
|
| 164 |
+
gr.Button("Scorer les lignes").click(
|
| 165 |
+
fn=score_table,
|
| 166 |
+
inputs=table_input,
|
| 167 |
+
outputs=table_output,
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
with gr.Tab("Fichier CSV"):
|
| 171 |
+
file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
|
| 172 |
+
file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
|
| 173 |
+
gr.Button("Scorer le fichier").click(
|
| 174 |
+
fn=score_csv,
|
| 175 |
+
inputs=file_input,
|
| 176 |
+
outputs=file_output,
|
| 177 |
+
)
|
| 178 |
|
|
|
|
|
|
|
| 179 |
|
| 180 |
+
if __name__ == "__main__":
|
| 181 |
+
demo.launch()
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml
CHANGED
|
@@ -1,10 +1,13 @@
|
|
| 1 |
-
name:
|
| 2 |
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
branches:
|
| 6 |
- main
|
| 7 |
|
|
|
|
|
|
|
|
|
|
| 8 |
jobs:
|
| 9 |
deploy:
|
| 10 |
runs-on: ubuntu-latest
|
|
@@ -23,7 +26,7 @@ jobs:
|
|
| 23 |
python -m pip install --upgrade pip
|
| 24 |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
| 25 |
|
| 26 |
-
- name:
|
| 27 |
env:
|
| 28 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 29 |
run: |
|
|
@@ -33,5 +36,5 @@ jobs:
|
|
| 33 |
rsync -av --exclude '.git' ./ hf_space/
|
| 34 |
cd hf_space
|
| 35 |
git add .
|
| 36 |
-
git commit -m "đ Auto-deploy from GitHub Actions"
|
| 37 |
-
git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
|
|
|
|
| 1 |
+
name: Deploy to Hugging Face Spaces
|
| 2 |
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
branches:
|
| 6 |
- main
|
| 7 |
|
| 8 |
+
permissions:
|
| 9 |
+
contents: write
|
| 10 |
+
|
| 11 |
jobs:
|
| 12 |
deploy:
|
| 13 |
runs-on: ubuntu-latest
|
|
|
|
| 26 |
python -m pip install --upgrade pip
|
| 27 |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
| 28 |
|
| 29 |
+
- name: Deploy to Hugging Face Space
|
| 30 |
env:
|
| 31 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 32 |
run: |
|
|
|
|
| 36 |
rsync -av --exclude '.git' ./ hf_space/
|
| 37 |
cd hf_space
|
| 38 |
git add .
|
| 39 |
+
git commit -m "đ Auto-deploy from GitHub Actions" || echo "No changes to commit"
|
| 40 |
+
git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore
CHANGED
|
@@ -1,2 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
*.code-workspace
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Data
|
| 2 |
+
/data/
|
| 3 |
+
|
| 4 |
+
# Mac OS-specific storage files
|
| 5 |
+
.DS_Store
|
| 6 |
*.code-workspace
|
| 7 |
+
|
| 8 |
+
# vim
|
| 9 |
+
*.swp
|
| 10 |
+
*.swo
|
| 11 |
+
|
| 12 |
+
## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
|
| 13 |
+
|
| 14 |
+
# Byte-compiled / optimized / DLL files
|
| 15 |
+
__pycache__/
|
| 16 |
+
*.py[cod]
|
| 17 |
+
*$py.class
|
| 18 |
+
|
| 19 |
+
# C extensions
|
| 20 |
+
*.so
|
| 21 |
+
|
| 22 |
+
# Distribution / packaging
|
| 23 |
+
.Python
|
| 24 |
+
build/
|
| 25 |
+
develop-eggs/
|
| 26 |
+
dist/
|
| 27 |
+
downloads/
|
| 28 |
+
eggs/
|
| 29 |
+
.eggs/
|
| 30 |
+
lib/
|
| 31 |
+
lib64/
|
| 32 |
+
parts/
|
| 33 |
+
sdist/
|
| 34 |
+
var/
|
| 35 |
+
wheels/
|
| 36 |
+
share/python-wheels/
|
| 37 |
+
*.egg-info/
|
| 38 |
+
.installed.cfg
|
| 39 |
+
*.egg
|
| 40 |
+
MANIFEST
|
| 41 |
+
|
| 42 |
+
# PyInstaller
|
| 43 |
+
# Usually these files are written by a python script from a template
|
| 44 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 45 |
+
*.manifest
|
| 46 |
+
*.spec
|
| 47 |
+
|
| 48 |
+
# Installer logs
|
| 49 |
+
pip-log.txt
|
| 50 |
+
pip-delete-this-directory.txt
|
| 51 |
+
|
| 52 |
+
# Unit test / coverage reports
|
| 53 |
+
htmlcov/
|
| 54 |
+
.tox/
|
| 55 |
+
.nox/
|
| 56 |
+
.coverage
|
| 57 |
+
.coverage.*
|
| 58 |
+
.cache
|
| 59 |
+
nosetests.xml
|
| 60 |
+
coverage.xml
|
| 61 |
+
*.cover
|
| 62 |
+
*.py,cover
|
| 63 |
+
.hypothesis/
|
| 64 |
+
.pytest_cache/
|
| 65 |
+
cover/
|
| 66 |
+
|
| 67 |
+
# Translations
|
| 68 |
+
*.mo
|
| 69 |
+
*.pot
|
| 70 |
+
|
| 71 |
+
# Django stuff:
|
| 72 |
+
*.log
|
| 73 |
+
local_settings.py
|
| 74 |
+
db.sqlite3
|
| 75 |
+
db.sqlite3-journal
|
| 76 |
+
|
| 77 |
+
# Flask stuff:
|
| 78 |
+
instance/
|
| 79 |
+
.webassets-cache
|
| 80 |
+
|
| 81 |
+
# Scrapy stuff:
|
| 82 |
+
.scrapy
|
| 83 |
+
|
| 84 |
+
# MkDocs documentation
|
| 85 |
+
docs/site/
|
| 86 |
+
|
| 87 |
+
# PyBuilder
|
| 88 |
+
.pybuilder/
|
| 89 |
+
target/
|
| 90 |
+
|
| 91 |
+
# Jupyter Notebook
|
| 92 |
+
.ipynb_checkpoints
|
| 93 |
+
|
| 94 |
+
# IPython
|
| 95 |
+
profile_default/
|
| 96 |
+
ipython_config.py
|
| 97 |
+
|
| 98 |
+
# pyenv
|
| 99 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 100 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 101 |
+
# .python-version
|
| 102 |
+
|
| 103 |
+
# pipenv
|
| 104 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 105 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 106 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 107 |
+
# install all needed dependencies.
|
| 108 |
+
#Pipfile.lock
|
| 109 |
+
|
| 110 |
+
# UV
|
| 111 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 112 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 113 |
+
# commonly ignored for libraries.
|
| 114 |
+
#uv.lock
|
| 115 |
+
|
| 116 |
+
# poetry
|
| 117 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 118 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 119 |
+
# commonly ignored for libraries.
|
| 120 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 121 |
+
#poetry.lock
|
| 122 |
+
|
| 123 |
+
# pdm
|
| 124 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 125 |
+
#pdm.lock
|
| 126 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 127 |
+
# in version control.
|
| 128 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
| 129 |
+
.pdm.toml
|
| 130 |
+
.pdm-python
|
| 131 |
+
.pdm-build/
|
| 132 |
+
|
| 133 |
+
# pixi
|
| 134 |
+
# pixi.lock should be committed to version control for reproducibility
|
| 135 |
+
# .pixi/ contains the environments and should not be committed
|
| 136 |
+
.pixi/
|
| 137 |
+
|
| 138 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 139 |
+
__pypackages__/
|
| 140 |
+
|
| 141 |
+
# Celery stuff
|
| 142 |
+
celerybeat-schedule
|
| 143 |
+
celerybeat.pid
|
| 144 |
+
|
| 145 |
+
# SageMath parsed files
|
| 146 |
+
*.sage.py
|
| 147 |
+
|
| 148 |
+
# Environments
|
| 149 |
+
.env
|
| 150 |
+
.venv
|
| 151 |
+
env/
|
| 152 |
+
venv/
|
| 153 |
+
ENV/
|
| 154 |
+
env.bak/
|
| 155 |
+
venv.bak/
|
| 156 |
+
|
| 157 |
+
# Spyder project settings
|
| 158 |
+
.spyderproject
|
| 159 |
+
.spyproject
|
| 160 |
+
|
| 161 |
+
# Rope project settings
|
| 162 |
+
.ropeproject
|
| 163 |
+
|
| 164 |
+
# mkdocs documentation
|
| 165 |
+
/site
|
| 166 |
+
|
| 167 |
+
# mypy
|
| 168 |
+
.mypy_cache/
|
| 169 |
+
.dmypy.json
|
| 170 |
+
dmypy.json
|
| 171 |
+
|
| 172 |
+
# Pyre type checker
|
| 173 |
+
.pyre/
|
| 174 |
+
|
| 175 |
+
# pytype static type analyzer
|
| 176 |
+
.pytype/
|
| 177 |
+
|
| 178 |
+
# Cython debug symbols
|
| 179 |
+
cython_debug/
|
| 180 |
+
|
| 181 |
+
# PyCharm
|
| 182 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 183 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 184 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 185 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 186 |
+
#.idea/
|
| 187 |
+
|
| 188 |
+
# Ruff stuff:
|
| 189 |
+
.ruff_cache/
|
| 190 |
+
|
| 191 |
+
# PyPI configuration file
|
| 192 |
+
.pypirc
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md
CHANGED
|
@@ -1,3 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Projet 05
|
| 3 |
emoji: đ
|
|
@@ -10,3 +71,270 @@ pinned: false
|
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# projet_05
|
| 2 |
+
|
| 3 |
+
<a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
|
| 4 |
+
<img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
|
| 5 |
+
</a>
|
| 6 |
+
|
| 7 |
+
Déployez un modÚle de Machine Learning
|
| 8 |
+
|
| 9 |
+
## Organisation du projet
|
| 10 |
+
|
| 11 |
+
```
|
| 12 |
+
âââ LICENSE <- Open-source license if one is chosen
|
| 13 |
+
âââ Makefile <- Makefile with convenience commands like `make data` or `make train`
|
| 14 |
+
âââ README.md <- The top-level README for developers using this project.
|
| 15 |
+
âââ data
|
| 16 |
+
â âââ external <- Data from third party sources.
|
| 17 |
+
â âââ interim <- Intermediate data that has been transformed.
|
| 18 |
+
â âââ processed <- The final, canonical data sets for modeling.
|
| 19 |
+
â âââ raw <- The original, immutable data dump.
|
| 20 |
+
â
|
| 21 |
+
âââ docs <- A default mkdocs project; see www.mkdocs.org for details
|
| 22 |
+
â
|
| 23 |
+
âââ models <- Trained and serialized models, model predictions, or model summaries
|
| 24 |
+
â
|
| 25 |
+
âââ notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
|
| 26 |
+
â the creator's initials, and a short `-` delimited description, e.g.
|
| 27 |
+
â `1.0-jqp-initial-data-exploration`.
|
| 28 |
+
â
|
| 29 |
+
âââ pyproject.toml <- Project configuration file with package metadata for
|
| 30 |
+
â projet_05 and configuration for tools like black
|
| 31 |
+
â
|
| 32 |
+
âââ references <- Data dictionaries, manuals, and all other explanatory materials.
|
| 33 |
+
â
|
| 34 |
+
âââ reports <- Generated analysis as HTML, PDF, LaTeX, etc.
|
| 35 |
+
â âââ figures <- Generated graphics and figures to be used in reporting
|
| 36 |
+
â
|
| 37 |
+
âââ requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
|
| 38 |
+
â generated with `pip freeze > requirements.txt`
|
| 39 |
+
â
|
| 40 |
+
âââ setup.cfg <- Configuration file for flake8
|
| 41 |
+
â
|
| 42 |
+
âââ projet_05 <- Source code for use in this project.
|
| 43 |
+
â
|
| 44 |
+
âââ __init__.py <- Makes projet_05 a Python module
|
| 45 |
+
â
|
| 46 |
+
âââ config.py <- Store useful variables and configuration
|
| 47 |
+
â
|
| 48 |
+
âââ dataset.py <- Scripts to download or generate data
|
| 49 |
+
â
|
| 50 |
+
âââ features.py <- Code to create features for modeling
|
| 51 |
+
â
|
| 52 |
+
âââ modeling
|
| 53 |
+
â âââ __init__.py
|
| 54 |
+
â âââ predict.py <- Code to run model inference with trained models
|
| 55 |
+
â âââ train.py <- Code to train models
|
| 56 |
+
â
|
| 57 |
+
âââ plots.py <- Code to create visualizations
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
--------
|
| 61 |
+
|
| 62 |
---
|
| 63 |
title: Projet 05
|
| 64 |
emoji: đ
|
|
|
|
| 71 |
---
|
| 72 |
|
| 73 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 74 |
+
|
| 75 |
+
<!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
|
| 76 |
+
<a id="readme-top"></a>
|
| 77 |
+
<!--
|
| 78 |
+
*** Thanks for checking out the Best-README-Template. If you have a suggestion
|
| 79 |
+
*** that would make this better, please fork the repo and create a pull request
|
| 80 |
+
*** or simply open an issue with the tag "enhancement".
|
| 81 |
+
*** Don't forget to give the project a star!
|
| 82 |
+
*** Thanks again! Now go create something AMAZING! :D
|
| 83 |
+
-->
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
<!-- PROJECT SHIELDS -->
|
| 88 |
+
<!--
|
| 89 |
+
*** I'm using markdown "reference style" links for readability.
|
| 90 |
+
*** Reference links are enclosed in brackets [ ] instead of parentheses ( ).
|
| 91 |
+
*** See the bottom of this document for the declaration of the reference variables
|
| 92 |
+
*** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use.
|
| 93 |
+
*** https://www.markdownguide.org/basic-syntax/#reference-style-links
|
| 94 |
+
-->
|
| 95 |
+
[![Contributors][contributors-shield]][contributors-url]
|
| 96 |
+
[![Forks][forks-shield]][forks-url]
|
| 97 |
+
[![Stargazers][stars-shield]][stars-url]
|
| 98 |
+
[![Issues][issues-shield]][issues-url]
|
| 99 |
+
[![project_license][license-shield]][license-url]
|
| 100 |
+
[![LinkedIn][linkedin-shield]][linkedin-url]
|
| 101 |
+

|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
<!-- PROJECT LOGO -->
|
| 106 |
+
<br />
|
| 107 |
+
<div align="center">
|
| 108 |
+
<a href="https://github.com/github_username/repo_name">
|
| 109 |
+
<img src="images/logo.png" alt="Logo" width="80" height="80">
|
| 110 |
+
</a>
|
| 111 |
+
|
| 112 |
+
<h3 align="center">project_title</h3>
|
| 113 |
+
|
| 114 |
+
<p align="center">
|
| 115 |
+
project_description
|
| 116 |
+
<br />
|
| 117 |
+
<a href="https://github.com/github_username/repo_name"><strong>Explore the docs »</strong></a>
|
| 118 |
+
<br />
|
| 119 |
+
<br />
|
| 120 |
+
<a href="https://github.com/github_username/repo_name">View Demo</a>
|
| 121 |
+
·
|
| 122 |
+
<a href="https://github.com/github_username/repo_name/issues/new?labels=bug&template=bug-report---.md">Report Bug</a>
|
| 123 |
+
·
|
| 124 |
+
<a href="https://github.com/github_username/repo_name/issues/new?labels=enhancement&template=feature-request---.md">Request Feature</a>
|
| 125 |
+
</p>
|
| 126 |
+
</div>
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
<!-- TABLE OF CONTENTS -->
|
| 131 |
+
<details>
|
| 132 |
+
<summary>Table of Contents</summary>
|
| 133 |
+
<ol>
|
| 134 |
+
<li>
|
| 135 |
+
<a href="#about-the-project">About The Project</a>
|
| 136 |
+
<ul>
|
| 137 |
+
<li><a href="#built-with">Built With</a></li>
|
| 138 |
+
</ul>
|
| 139 |
+
</li>
|
| 140 |
+
<li>
|
| 141 |
+
<a href="#getting-started">Getting Started</a>
|
| 142 |
+
<ul>
|
| 143 |
+
<li><a href="#prerequisites">Prerequisites</a></li>
|
| 144 |
+
<li><a href="#installation">Installation</a></li>
|
| 145 |
+
</ul>
|
| 146 |
+
</li>
|
| 147 |
+
<li><a href="#usage">Usage</a></li>
|
| 148 |
+
<li><a href="#roadmap">Roadmap</a></li>
|
| 149 |
+
<li><a href="#contributing">Contributing</a></li>
|
| 150 |
+
<li><a href="#license">License</a></li>
|
| 151 |
+
<li><a href="#contact">Contact</a></li>
|
| 152 |
+
<li><a href="#acknowledgments">Acknowledgments</a></li>
|
| 153 |
+
</ol>
|
| 154 |
+
</details>
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
<!-- ABOUT THE PROJECT -->
|
| 159 |
+
## About The Project
|
| 160 |
+
|
| 161 |
+
[![Product Name Screen Shot][product-screenshot]](https://example.com)
|
| 162 |
+
|
| 163 |
+
Here's a blank template to get started. To avoid retyping too much info, do a search and replace with your text editor for the following: `github_username`, `repo_name`, `twitter_handle`, `linkedin_username`, `email_client`, `email`, `project_title`, `project_description`, `project_license`
|
| 164 |
+
|
| 165 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
### Built With
|
| 170 |
+
|
| 171 |
+
* [![Python][Python]][Python-url]
|
| 172 |
+
* [![SQL][SQL]][SQL-url]
|
| 173 |
+
|
| 174 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
<!-- GETTING STARTED -->
|
| 179 |
+
## Getting Started
|
| 180 |
+
|
| 181 |
+
This is an example of how you may give instructions on setting up your project locally.
|
| 182 |
+
To get a local copy up and running follow these simple example steps.
|
| 183 |
+
|
| 184 |
+
### Prerequisites
|
| 185 |
+
|
| 186 |
+
This is an example of how to list things you need to use the software and how to install them.
|
| 187 |
+
* npm
|
| 188 |
+
```sh
|
| 189 |
+
npm install npm@latest -g
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
+
### Installation
|
| 193 |
+
|
| 194 |
+
pip install -r requirements.txt
|
| 195 |
+
uvicorn app.main:app --reload
|
| 196 |
+
|
| 197 |
+
1. Get a free API Key at [https://example.com](https://example.com)
|
| 198 |
+
2. Clone the repo
|
| 199 |
+
```sh
|
| 200 |
+
git clone https://github.com/github_username/repo_name.git
|
| 201 |
+
```
|
| 202 |
+
3. Install NPM packages
|
| 203 |
+
```sh
|
| 204 |
+
npm install
|
| 205 |
+
```
|
| 206 |
+
4. Enter your API in `config.js`
|
| 207 |
+
```js
|
| 208 |
+
const API_KEY = 'ENTER YOUR API';
|
| 209 |
+
```
|
| 210 |
+
5. Change git remote url to avoid accidental pushes to base project
|
| 211 |
+
```sh
|
| 212 |
+
git remote set-url origin github_username/repo_name
|
| 213 |
+
git remote -v # confirm the changes
|
| 214 |
+
```
|
| 215 |
+
|
| 216 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
<!-- USAGE EXAMPLES -->
|
| 221 |
+
## Usage
|
| 222 |
+
|
| 223 |
+
Use this space to show useful examples of how a project can be used. Additional screenshots, code examples and demos work well in this space. You may also link to more resources.
|
| 224 |
+
|
| 225 |
+
_For more examples, please refer to the [Documentation](https://example.com)_
|
| 226 |
+
|
| 227 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
<!-- ROADMAP -->
|
| 232 |
+
## Roadmap
|
| 233 |
+
|
| 234 |
+
- [ ] Feature 1
|
| 235 |
+
- [ ] Feature 2
|
| 236 |
+
- [ ] Feature 3
|
| 237 |
+
- [ ] Nested Feature
|
| 238 |
+
|
| 239 |
+
See the [open issues](https://github.com/github_username/repo_name/issues) for a full list of proposed features (and known issues).
|
| 240 |
+
|
| 241 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
<!-- CONTRIBUTING -->
|
| 246 |
+
## Contributing
|
| 247 |
+
|
| 248 |
+
Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**.
|
| 249 |
+
|
| 250 |
+
If you have a suggestion that would make this better, please fork the repo and create a pull request. You can also simply open an issue with the tag "enhancement".
|
| 251 |
+
Don't forget to give the project a star! Thanks again!
|
| 252 |
+
|
| 253 |
+
1. Fork the Project
|
| 254 |
+
2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
|
| 255 |
+
3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
|
| 256 |
+
4. Push to the Branch (`git push origin feature/AmazingFeature`)
|
| 257 |
+
5. Open a Pull Request
|
| 258 |
+
|
| 259 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 260 |
+
|
| 261 |
+
### Top contributors:
|
| 262 |
+
|
| 263 |
+
<a href="https://github.com/github_username/repo_name/graphs/contributors">
|
| 264 |
+
<img src="https://contrib.rocks/image?repo=github_username/repo_name" alt="contrib.rocks image" />
|
| 265 |
+
</a>
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
<!-- LICENSE -->
|
| 270 |
+
## License
|
| 271 |
+
|
| 272 |
+
Distributed under the project_license. See `LICENSE.txt` for more information.
|
| 273 |
+
|
| 274 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
<!-- CONTACT -->
|
| 279 |
+
## Contact
|
| 280 |
+
|
| 281 |
+
Your Name - [@twitter_handle](https://twitter.com/twitter_handle) - email@email_client.com
|
| 282 |
+
|
| 283 |
+
Project Link: [https://github.com/github_username/repo_name](https://github.com/github_username/repo_name)
|
| 284 |
+
|
| 285 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
<!-- ACKNOWLEDGMENTS -->
|
| 290 |
+
## Acknowledgments
|
| 291 |
+
|
| 292 |
+
* []()
|
| 293 |
+
* []()
|
| 294 |
+
* []()
|
| 295 |
+
|
| 296 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
<!-- MARKDOWN LINKS & IMAGES -->
|
| 301 |
+
<!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
|
| 302 |
+
[contributors-shield]: https://img.shields.io/github/contributors/github_username/repo_name.svg?style=for-the-badge
|
| 303 |
+
[contributors-url]: https://github.com/github_username/repo_name/graphs/contributors
|
| 304 |
+
[forks-shield]: https://img.shields.io/github/forks/github_username/repo_name.svg?style=for-the-badge
|
| 305 |
+
[forks-url]: https://github.com/github_username/repo_name/network/members
|
| 306 |
+
[stars-shield]: https://img.shields.io/github/stars/github_username/repo_name.svg?style=for-the-badge
|
| 307 |
+
[stars-url]: https://github.com/github_username/repo_name/stargazers
|
| 308 |
+
[issues-shield]: https://img.shields.io/github/issues/github_username/repo_name.svg?style=for-the-badge
|
| 309 |
+
[issues-url]: https://github.com/github_username/repo_name/issues
|
| 310 |
+
[license-shield]: https://img.shields.io/github/license/github_username/repo_name.svg?style=for-the-badge
|
| 311 |
+
[license-url]: https://github.com/github_username/repo_name/blob/master/LICENSE.txt
|
| 312 |
+
[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
|
| 313 |
+
[linkedin-url]: https://linkedin.com/in/linkedin_username
|
| 314 |
+
[product-screenshot]: images/screenshot.png
|
| 315 |
+
[Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
|
| 316 |
+
<!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
|
| 317 |
+
[Next.js]: https://img.shields.io/badge/next.js-000000?style=for-the-badge&logo=nextdotjs&logoColor=white
|
| 318 |
+
[Next-url]: https://nextjs.org/
|
| 319 |
+
[React.js]: https://img.shields.io/badge/React-20232A?style=for-the-badge&logo=react&logoColor=61DAFB
|
| 320 |
+
[React-url]: https://reactjs.org/
|
| 321 |
+
[Vue.js]: https://img.shields.io/badge/Vue.js-35495E?style=for-the-badge&logo=vuedotjs&logoColor=4FC08D
|
| 322 |
+
[Vue-url]: https://vuejs.org/
|
| 323 |
+
[Angular.io]: https://img.shields.io/badge/Angular-DD0031?style=for-the-badge&logo=angular&logoColor=white
|
| 324 |
+
[Angular-url]: https://angular.io/
|
| 325 |
+
[Svelte.dev]: https://img.shields.io/badge/Svelte-4A4A55?style=for-the-badge&logo=svelte&logoColor=FF3E00
|
| 326 |
+
[Svelte-url]: https://svelte.dev/
|
| 327 |
+
[Laravel.com]: https://img.shields.io/badge/Laravel-FF2D20?style=for-the-badge&logo=laravel&logoColor=white
|
| 328 |
+
[Laravel-url]: https://laravel.com
|
| 329 |
+
[Bootstrap.com]: https://img.shields.io/badge/Bootstrap-563D7C?style=for-the-badge&logo=bootstrap&logoColor=white
|
| 330 |
+
[Bootstrap-url]: https://getbootstrap.com
|
| 331 |
+
[JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
|
| 332 |
+
[JQuery-url]: https://jquery.com
|
| 333 |
+
<!-- TODO: -->
|
| 334 |
+
[](#)
|
| 335 |
+
[](#)
|
| 336 |
+
[](#)
|
| 337 |
+
[](#)
|
| 338 |
+
[](#)
|
| 339 |
+
[](#)
|
| 340 |
+
[](#)[text](../projet_04/.gitignore)
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
|
| 3 |
+
def greet(name):
|
| 4 |
+
return "Hello " + name + "!!"
|
| 5 |
+
|
| 6 |
+
demo = gr.Interface(fn=greet, inputs="text", outputs="text")
|
| 7 |
+
demo.launch()
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Déployer vers Hugging Face Spaces
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
deploy:
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
|
| 12 |
+
steps:
|
| 13 |
+
- name: Checkout repository
|
| 14 |
+
uses: actions/checkout@v4
|
| 15 |
+
|
| 16 |
+
- name: Setup Python
|
| 17 |
+
uses: actions/setup-python@v5
|
| 18 |
+
with:
|
| 19 |
+
python-version: "3.10"
|
| 20 |
+
|
| 21 |
+
- name: Install dependencies
|
| 22 |
+
run: |
|
| 23 |
+
python -m pip install --upgrade pip
|
| 24 |
+
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
| 25 |
+
|
| 26 |
+
- name: Push to Hugging Face Space
|
| 27 |
+
env:
|
| 28 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 29 |
+
run: |
|
| 30 |
+
git config --global user.email "actions@github.com"
|
| 31 |
+
git config --global user.name "GitHub Actions"
|
| 32 |
+
git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
|
| 33 |
+
rsync -av --exclude '.git' ./ hf_space/
|
| 34 |
+
cd hf_space
|
| 35 |
+
git add .
|
| 36 |
+
git commit -m "đ Auto-deploy from GitHub Actions"
|
| 37 |
+
git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.code-workspace
|
| 2 |
+
.venv/
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Projet 05
|
| 3 |
+
emoji: đ
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.49.1
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
|
| 3 |
+
def greet(name):
|
| 4 |
+
return "Hello " + name + "!!"
|
| 5 |
+
|
| 6 |
+
demo = gr.Interface(fn=greet, inputs="text", outputs="text")
|
| 7 |
+
demo.launch()
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from app.main import greet
|
| 3 |
+
|
| 4 |
+
def test_greet_returns_string():
|
| 5 |
+
"""Vérifie que la fonction retourne bien une chaßne de caractÚres."""
|
| 6 |
+
result = greet("Alice")
|
| 7 |
+
assert isinstance(result, str), "Le rĂ©sultat doit ĂȘtre une chaĂźne de caractĂšres."
|
| 8 |
+
|
| 9 |
+
def test_greet_output_content():
|
| 10 |
+
"""Vérifie que la fonction génÚre la phrase attendue."""
|
| 11 |
+
result = greet("Bob")
|
| 12 |
+
assert result == "Hello Bob!!", f"Résultat inattendu : {result}"
|
| 13 |
+
|
| 14 |
+
def test_greet_with_empty_string():
|
| 15 |
+
"""VĂ©rifie le comportement si lâentrĂ©e est vide."""
|
| 16 |
+
result = greet("")
|
| 17 |
+
assert result == "Hello !!", "Le résultat doit gérer les entrées vides."
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.toml
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[virtualenvs]
|
| 2 |
+
in-project = true
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from projet_05 import config # noqa: F401
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/config.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from loguru import logger
|
| 5 |
+
|
| 6 |
+
# Load environment variables from .env file if it exists
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
# Paths
|
| 10 |
+
PROJ_ROOT = Path(__file__).resolve().parents[1]
|
| 11 |
+
logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")
|
| 12 |
+
|
| 13 |
+
DATA_DIR = PROJ_ROOT / "data"
|
| 14 |
+
RAW_DATA_DIR = DATA_DIR / "raw"
|
| 15 |
+
INTERIM_DATA_DIR = DATA_DIR / "interim"
|
| 16 |
+
PROCESSED_DATA_DIR = DATA_DIR / "processed"
|
| 17 |
+
EXTERNAL_DATA_DIR = DATA_DIR / "external"
|
| 18 |
+
|
| 19 |
+
MODELS_DIR = PROJ_ROOT / "models"
|
| 20 |
+
|
| 21 |
+
REPORTS_DIR = PROJ_ROOT / "reports"
|
| 22 |
+
FIGURES_DIR = REPORTS_DIR / "figures"
|
| 23 |
+
|
| 24 |
+
# If tqdm is installed, configure loguru with tqdm.write
|
| 25 |
+
# https://github.com/Delgan/loguru/issues/135
|
| 26 |
+
try:
|
| 27 |
+
from tqdm import tqdm
|
| 28 |
+
|
| 29 |
+
logger.remove(0)
|
| 30 |
+
logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
|
| 31 |
+
except ModuleNotFoundError:
|
| 32 |
+
pass
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from loguru import logger
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
import typer
|
| 6 |
+
|
| 7 |
+
from projet_05.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
|
| 8 |
+
|
| 9 |
+
app = typer.Typer()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@app.command()
|
| 13 |
+
def main(
|
| 14 |
+
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
|
| 15 |
+
input_path: Path = RAW_DATA_DIR / "dataset.csv",
|
| 16 |
+
output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
|
| 17 |
+
# ----------------------------------------------
|
| 18 |
+
):
|
| 19 |
+
# ---- REPLACE THIS WITH YOUR OWN CODE ----
|
| 20 |
+
logger.info("Processing dataset...")
|
| 21 |
+
for i in tqdm(range(10), total=10):
|
| 22 |
+
if i == 5:
|
| 23 |
+
logger.info("Something happened for iteration 5.")
|
| 24 |
+
logger.success("Processing dataset complete.")
|
| 25 |
+
# -----------------------------------------
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
if __name__ == "__main__":
|
| 29 |
+
app()
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from loguru import logger
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
import typer
|
| 6 |
+
|
| 7 |
+
from projet_05.config import PROCESSED_DATA_DIR
|
| 8 |
+
|
| 9 |
+
app = typer.Typer()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@app.command()
|
| 13 |
+
def main(
|
| 14 |
+
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
|
| 15 |
+
input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
|
| 16 |
+
output_path: Path = PROCESSED_DATA_DIR / "features.csv",
|
| 17 |
+
# -----------------------------------------
|
| 18 |
+
):
|
| 19 |
+
# ---- REPLACE THIS WITH YOUR OWN CODE ----
|
| 20 |
+
logger.info("Generating features from dataset...")
|
| 21 |
+
for i in tqdm(range(10), total=10):
|
| 22 |
+
if i == 5:
|
| 23 |
+
logger.info("Something happened for iteration 5.")
|
| 24 |
+
logger.success("Features generation complete.")
|
| 25 |
+
# -----------------------------------------
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
if __name__ == "__main__":
|
| 29 |
+
app()
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from loguru import logger
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
import typer
|
| 6 |
+
|
| 7 |
+
from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
|
| 8 |
+
|
| 9 |
+
app = typer.Typer()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@app.command()
|
| 13 |
+
def main(
|
| 14 |
+
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
|
| 15 |
+
features_path: Path = PROCESSED_DATA_DIR / "test_features.csv",
|
| 16 |
+
model_path: Path = MODELS_DIR / "model.pkl",
|
| 17 |
+
predictions_path: Path = PROCESSED_DATA_DIR / "test_predictions.csv",
|
| 18 |
+
# -----------------------------------------
|
| 19 |
+
):
|
| 20 |
+
# ---- REPLACE THIS WITH YOUR OWN CODE ----
|
| 21 |
+
logger.info("Performing inference for model...")
|
| 22 |
+
for i in tqdm(range(10), total=10):
|
| 23 |
+
if i == 5:
|
| 24 |
+
logger.info("Something happened for iteration 5.")
|
| 25 |
+
logger.success("Inference complete.")
|
| 26 |
+
# -----------------------------------------
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
if __name__ == "__main__":
|
| 30 |
+
app()
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from loguru import logger
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
import typer
|
| 6 |
+
|
| 7 |
+
from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
|
| 8 |
+
|
| 9 |
+
app = typer.Typer()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@app.command()
|
| 13 |
+
def main(
|
| 14 |
+
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
|
| 15 |
+
features_path: Path = PROCESSED_DATA_DIR / "features.csv",
|
| 16 |
+
labels_path: Path = PROCESSED_DATA_DIR / "labels.csv",
|
| 17 |
+
model_path: Path = MODELS_DIR / "model.pkl",
|
| 18 |
+
# -----------------------------------------
|
| 19 |
+
):
|
| 20 |
+
# ---- REPLACE THIS WITH YOUR OWN CODE ----
|
| 21 |
+
logger.info("Training some model...")
|
| 22 |
+
for i in tqdm(range(10), total=10):
|
| 23 |
+
if i == 5:
|
| 24 |
+
logger.info("Something happened for iteration 5.")
|
| 25 |
+
logger.success("Modeling training complete.")
|
| 26 |
+
# -----------------------------------------
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
if __name__ == "__main__":
|
| 30 |
+
app()
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/plots.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from loguru import logger
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
import typer
|
| 6 |
+
|
| 7 |
+
from projet_05.config import FIGURES_DIR, PROCESSED_DATA_DIR
|
| 8 |
+
|
| 9 |
+
app = typer.Typer()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@app.command()
|
| 13 |
+
def main(
|
| 14 |
+
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
|
| 15 |
+
input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
|
| 16 |
+
output_path: Path = FIGURES_DIR / "plot.png",
|
| 17 |
+
# -----------------------------------------
|
| 18 |
+
):
|
| 19 |
+
# ---- REPLACE THIS WITH YOUR OWN CODE ----
|
| 20 |
+
logger.info("Generating plot from data...")
|
| 21 |
+
for i in tqdm(range(10), total=10):
|
| 22 |
+
if i == 5:
|
| 23 |
+
logger.info("Something happened for iteration 5.")
|
| 24 |
+
logger.success("Plot generation complete.")
|
| 25 |
+
# -----------------------------------------
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
if __name__ == "__main__":
|
| 29 |
+
app()
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["flit_core >=3.2,<4"]
|
| 3 |
+
build-backend = "flit_core.buildapi"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "projet_05"
|
| 7 |
+
version = "0.0.1"
|
| 8 |
+
description = "D\u00e9ployez un mod\u00e8le de Machine Learning"
|
| 9 |
+
authors = [
|
| 10 |
+
{ name = "St\u00e9phane Manet" },
|
| 11 |
+
]
|
| 12 |
+
license = { file = "LICENSE" }
|
| 13 |
+
readme = "README.md"
|
| 14 |
+
classifiers = [
|
| 15 |
+
"Programming Language :: Python :: 3",
|
| 16 |
+
"License :: OSI Approved :: MIT License"
|
| 17 |
+
]
|
| 18 |
+
dependencies = [
|
| 19 |
+
"loguru",
|
| 20 |
+
"mkdocs",
|
| 21 |
+
"pip",
|
| 22 |
+
"pytest",
|
| 23 |
+
"python-dotenv",
|
| 24 |
+
"ruff",
|
| 25 |
+
"tqdm",
|
| 26 |
+
"typer",
|
| 27 |
+
"imbalanced-learn (>=0.14.0,<0.15.0)",
|
| 28 |
+
"scikit-learn (>=1.4.2,<2.0.0)",
|
| 29 |
+
"matplotlib (>=3.10.7,<4.0.0)",
|
| 30 |
+
"numpy (>=2.3.4,<3.0.0)",
|
| 31 |
+
"pandas (>=2.3.3,<3.0.0)",
|
| 32 |
+
"pyyaml (>=6.0.3,<7.0.0)",
|
| 33 |
+
"scipy (>=1.16.3,<2.0.0)",
|
| 34 |
+
"seaborn (>=0.13.2,<0.14.0)",
|
| 35 |
+
"shap (>=0.49.1,<0.50.0)",
|
| 36 |
+
"gradio (>=5.49.1,<6.0.0)",
|
| 37 |
+
"joblib (>=1.4.2,<2.0.0)"
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
requires-python = ">=3.11,<3.13"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
[tool.ruff]
|
| 44 |
+
line-length = 99
|
| 45 |
+
src = ["projet_05"]
|
| 46 |
+
include = ["pyproject.toml", "projet_05/**/*.py"]
|
| 47 |
+
|
| 48 |
+
[tool.ruff.lint]
|
| 49 |
+
extend-select = ["I"] # Add import sorting
|
| 50 |
+
|
| 51 |
+
[tool.ruff.lint.isort]
|
| 52 |
+
known-first-party = ["projet_05"]
|
| 53 |
+
force-sort-within-sections = true
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/references/.gitkeep
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/.gitkeep
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_data.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def test_code_is_tested():
|
| 5 |
+
assert False
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py
CHANGED
|
@@ -1 +1,4 @@
|
|
| 1 |
from projet_05 import config # noqa: F401
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from projet_05 import config # noqa: F401
|
| 2 |
+
from projet_05.settings import Settings, load_settings # noqa: F401
|
| 3 |
+
|
| 4 |
+
__all__ = ["config", "Settings", "load_settings"]
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/branding.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from functools import lru_cache
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Union
|
| 6 |
+
|
| 7 |
+
from scripts_projet04.brand.brand import ( # type: ignore[import-not-found]
|
| 8 |
+
Theme,
|
| 9 |
+
ThemeConfig,
|
| 10 |
+
configure_brand,
|
| 11 |
+
load_brand,
|
| 12 |
+
make_diverging_cmap,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
ROOT_DIR = Path(__file__).resolve().parents[1]
|
| 16 |
+
DEFAULT_BRAND_PATH = ROOT_DIR / "scripts_projet04" / "brand" / "brand.yml"
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _resolve_path(path: Union[str, Path, None]) -> Path:
|
| 20 |
+
if path is None:
|
| 21 |
+
return DEFAULT_BRAND_PATH
|
| 22 |
+
return Path(path).expanduser().resolve()
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@lru_cache(maxsize=1)
|
| 26 |
+
def load_brand_config(path: Union[str, Path, None] = None) -> ThemeConfig:
|
| 27 |
+
"""Load the brand YAML once and return the parsed ThemeConfig."""
|
| 28 |
+
cfg_path = _resolve_path(path)
|
| 29 |
+
return load_brand(cfg_path)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@lru_cache(maxsize=1)
|
| 33 |
+
def apply_brand_theme(path: Union[str, Path, None] = None) -> ThemeConfig:
|
| 34 |
+
"""
|
| 35 |
+
Apply the OpenClassrooms/TechNova brand theme globally.
|
| 36 |
+
|
| 37 |
+
Returns the ThemeConfig so callers can inspect colors if needed.
|
| 38 |
+
"""
|
| 39 |
+
cfg_path = _resolve_path(path)
|
| 40 |
+
cfg = configure_brand(cfg_path)
|
| 41 |
+
Theme.apply()
|
| 42 |
+
return cfg
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
__all__ = [
|
| 46 |
+
"Theme",
|
| 47 |
+
"ThemeConfig",
|
| 48 |
+
"apply_brand_theme",
|
| 49 |
+
"load_brand_config",
|
| 50 |
+
"make_diverging_cmap",
|
| 51 |
+
"DEFAULT_BRAND_PATH",
|
| 52 |
+
]
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py
CHANGED
|
@@ -1,28 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
|
|
|
|
|
|
|
| 3 |
from loguru import logger
|
| 4 |
-
from tqdm import tqdm
|
| 5 |
import typer
|
| 6 |
|
| 7 |
-
from projet_05.config import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
|
|
|
|
|
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
@app.command()
|
| 13 |
def main(
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
):
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
logger.success("Processing dataset complete.")
|
| 25 |
-
# -----------------------------------------
|
| 26 |
|
| 27 |
|
| 28 |
if __name__ == "__main__":
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import sqlite3
|
| 4 |
from pathlib import Path
|
| 5 |
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
from loguru import logger
|
|
|
|
| 9 |
import typer
|
| 10 |
|
| 11 |
+
from projet_05.config import INTERIM_DATA_DIR
|
| 12 |
+
from projet_05.settings import Settings, load_settings
|
| 13 |
+
|
| 14 |
+
app = typer.Typer(help="Préparation et fusion des données sources.")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# ---------------------------------------------------------------------------
|
| 18 |
+
# Utilitaires
|
| 19 |
+
# ---------------------------------------------------------------------------
|
| 20 |
+
def safe_read_csv(path: Path, *, dtype=None) -> pd.DataFrame:
|
| 21 |
+
"""Read a CSV file and return an empty frame when it fails."""
|
| 22 |
+
try:
|
| 23 |
+
logger.info("Lecture du fichier {}", path)
|
| 24 |
+
return pd.read_csv(path, dtype=dtype)
|
| 25 |
+
except FileNotFoundError:
|
| 26 |
+
logger.warning("Fichier absent: {}", path)
|
| 27 |
+
return pd.DataFrame()
|
| 28 |
+
except Exception as exc: # pragma: no cover - log + empty dataframe
|
| 29 |
+
logger.error("Impossible de lire {} ({})", path, exc)
|
| 30 |
+
return pd.DataFrame()
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def clean_text_values(df: pd.DataFrame) -> pd.DataFrame:
|
| 34 |
+
"""Normalize textual values that often materialize missing values."""
|
| 35 |
+
replace_tokens = [
|
| 36 |
+
"",
|
| 37 |
+
" ",
|
| 38 |
+
" ",
|
| 39 |
+
" ",
|
| 40 |
+
"nan",
|
| 41 |
+
"NaN",
|
| 42 |
+
"NAN",
|
| 43 |
+
"None",
|
| 44 |
+
"JE ne sais pas",
|
| 45 |
+
"je ne sais pas",
|
| 46 |
+
"Je ne sais pas",
|
| 47 |
+
"Unknow",
|
| 48 |
+
"Unknown",
|
| 49 |
+
"non pertinent",
|
| 50 |
+
"Non pertinent",
|
| 51 |
+
"NON PERTINENT",
|
| 52 |
+
]
|
| 53 |
+
normalized = df.copy()
|
| 54 |
+
normalized = normalized.replace(replace_tokens, np.nan)
|
| 55 |
+
|
| 56 |
+
for column in normalized.select_dtypes(include="object"):
|
| 57 |
+
normalized[column] = (
|
| 58 |
+
normalized[column].replace(replace_tokens, np.nan).astype("string").str.strip()
|
| 59 |
+
)
|
| 60 |
+
return normalized
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _harmonize_id_column(df: pd.DataFrame, column: str, *, digits_only: bool = True) -> pd.DataFrame:
|
| 64 |
+
data = df.copy()
|
| 65 |
+
if column not in data.columns:
|
| 66 |
+
return data
|
| 67 |
+
|
| 68 |
+
if digits_only:
|
| 69 |
+
extracted = data[column].astype(str).str.extract(r"(\\d+)")
|
| 70 |
+
data[column] = pd.to_numeric(extracted[0], errors="coerce")
|
| 71 |
+
data[column] = pd.to_numeric(data[column], errors="coerce").astype("Int64")
|
| 72 |
+
return data
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def _rename_column(df: pd.DataFrame, source: str, target: str) -> pd.DataFrame:
|
| 76 |
+
if source not in df.columns:
|
| 77 |
+
return df
|
| 78 |
+
return df.rename(columns={source: target})
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _log_id_diagnostics(df: pd.DataFrame, *, name: str, col_id: str) -> None:
|
| 82 |
+
if col_id not in df.columns:
|
| 83 |
+
logger.warning("La colonne {} est absente du fichier {}.", col_id, name)
|
| 84 |
+
return
|
| 85 |
+
total = len(df)
|
| 86 |
+
uniques = df[col_id].nunique(dropna=True)
|
| 87 |
+
duplicates = total - uniques
|
| 88 |
+
logger.info(
|
| 89 |
+
"{name}: {total} lignes | {uniques} identifiants uniques | {duplicates} doublons",
|
| 90 |
+
name=name,
|
| 91 |
+
total=total,
|
| 92 |
+
uniques=uniques,
|
| 93 |
+
duplicates=duplicates,
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _persist_sql_trace(df_dict: dict[str, pd.DataFrame], settings: Settings) -> pd.DataFrame:
|
| 98 |
+
"""
|
| 99 |
+
Reproduire la fusion SQL décrite dans le notebook.
|
| 100 |
|
| 101 |
+
Chaque DataFrame est stocké dans une base SQLite éphémÚre pour
|
| 102 |
+
conserver une traçabilitĂ© de la requĂȘte exĂ©cutĂ©e.
|
| 103 |
+
"""
|
| 104 |
+
db_path = settings.db_file
|
| 105 |
+
sql_path = settings.sql_file
|
| 106 |
|
| 107 |
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
| 108 |
+
sql_path.parent.mkdir(parents=True, exist_ok=True)
|
| 109 |
|
| 110 |
+
if db_path.exists():
|
| 111 |
+
db_path.unlink()
|
| 112 |
+
|
| 113 |
+
query = f"""
|
| 114 |
+
SELECT *
|
| 115 |
+
FROM sirh
|
| 116 |
+
INNER JOIN evaluation USING ({settings.col_id})
|
| 117 |
+
INNER JOIN sond USING ({settings.col_id});
|
| 118 |
+
""".strip()
|
| 119 |
+
|
| 120 |
+
with db_path.open("wb") as _:
|
| 121 |
+
pass # just ensure the file exists for sqlite on some platforms
|
| 122 |
+
|
| 123 |
+
with sqlite3.connect(db_path) as conn:
|
| 124 |
+
for name, frame in df_dict.items():
|
| 125 |
+
frame.to_sql(name, conn, index=False, if_exists="replace")
|
| 126 |
+
merged = pd.read_sql_query(query, conn)
|
| 127 |
+
|
| 128 |
+
sql_path.write_text(query, encoding="utf-8")
|
| 129 |
+
return merged
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def build_dataset(settings: Settings) -> pd.DataFrame:
|
| 133 |
+
"""Load, clean, harmonize and merge the three raw sources."""
|
| 134 |
+
sirh = clean_text_values(
|
| 135 |
+
safe_read_csv(settings.path_sirh).pipe(
|
| 136 |
+
_harmonize_id_column, settings.col_id, digits_only=True
|
| 137 |
+
)
|
| 138 |
+
)
|
| 139 |
+
evaluation = clean_text_values(
|
| 140 |
+
safe_read_csv(settings.path_eval)
|
| 141 |
+
.pipe(_rename_column, "eval_number", settings.col_id)
|
| 142 |
+
.pipe(_harmonize_id_column, settings.col_id, digits_only=True)
|
| 143 |
+
)
|
| 144 |
+
sond = clean_text_values(
|
| 145 |
+
safe_read_csv(settings.path_sondage)
|
| 146 |
+
.pipe(_rename_column, "code_sondage", settings.col_id)
|
| 147 |
+
.pipe(_harmonize_id_column, settings.col_id, digits_only=True)
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
for name, frame in {"sirh": sirh, "evaluation": evaluation, "sond": sond}.items():
|
| 151 |
+
_log_id_diagnostics(frame, name=name, col_id=settings.col_id)
|
| 152 |
+
|
| 153 |
+
frames = {
|
| 154 |
+
"sirh": sirh,
|
| 155 |
+
"evaluation": evaluation,
|
| 156 |
+
"sond": sond,
|
| 157 |
+
}
|
| 158 |
+
merged = _persist_sql_trace(frames, settings)
|
| 159 |
+
|
| 160 |
+
missing_cols = [settings.col_id] if settings.col_id not in merged.columns else []
|
| 161 |
+
if missing_cols:
|
| 162 |
+
raise KeyError(
|
| 163 |
+
f"La colonne {settings.col_id} est absente de la fusion finale. "
|
| 164 |
+
"Vérifiez vos fichiers sources."
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
logger.success("Fusion réalisée: {} lignes / {} colonnes", *merged.shape)
|
| 168 |
+
return merged
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def save_dataset(df: pd.DataFrame, output_path: Path) -> None:
|
| 172 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 173 |
+
df.to_csv(output_path, index=False)
|
| 174 |
+
logger.success("Fichier fusionné sauvegardé dans {}", output_path)
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
# ---------------------------------------------------------------------------
|
| 178 |
+
# CLI
|
| 179 |
+
# ---------------------------------------------------------------------------
|
| 180 |
@app.command()
|
| 181 |
def main(
|
| 182 |
+
settings_path: Path = typer.Option(
|
| 183 |
+
None,
|
| 184 |
+
"--settings",
|
| 185 |
+
"-s",
|
| 186 |
+
help="Chemin vers un fichier settings.yml personnalisé.",
|
| 187 |
+
),
|
| 188 |
+
output_path: Path = typer.Option(
|
| 189 |
+
INTERIM_DATA_DIR / "merged.csv",
|
| 190 |
+
"--output",
|
| 191 |
+
"-o",
|
| 192 |
+
help="Chemin de sortie du dataset fusionné.",
|
| 193 |
+
),
|
| 194 |
):
|
| 195 |
+
"""Entrypoint Typer pour reproduire la fusion des données brutes."""
|
| 196 |
+
|
| 197 |
+
settings = load_settings(settings_path) if settings_path else load_settings()
|
| 198 |
+
df = build_dataset(settings)
|
| 199 |
+
save_dataset(df, output_path)
|
|
|
|
|
|
|
| 200 |
|
| 201 |
|
| 202 |
if __name__ == "__main__":
|