GitHub Actions commited on
Commit
4a7ca9a
·
1 Parent(s): 77f2ae3

🚀 Auto-deploy from GitHub Actions

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +157 -323
  2. hf_space/hf_space/hf_space/hf_space/Makefile +3 -1
  3. hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +1 -0
  4. hf_space/hf_space/hf_space/hf_space/hf_space/README.md +66 -1
  5. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +547 -17
  6. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +7 -3
  7. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/static.yml +37 -0
  8. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +5 -12
  9. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +13 -9
  10. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +35 -1
  11. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +5 -0
  12. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +7 -27
  13. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +2 -2
  14. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +34 -18
  15. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/LICENSE +10 -0
  16. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Makefile +85 -0
  17. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +178 -4
  18. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +7 -4
  19. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +191 -1
  20. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +328 -0
  21. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py +0 -0
  22. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py +7 -0
  23. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +37 -0
  24. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes +35 -0
  25. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +2 -0
  26. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +12 -0
  27. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +7 -0
  28. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py +17 -0
  29. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep +0 -0
  30. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
  31. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock +0 -0
  32. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.toml +2 -0
  33. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py +1 -0
  34. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/config.py +32 -0
  35. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py +29 -0
  36. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py +29 -0
  37. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py +0 -0
  38. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py +30 -0
  39. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py +30 -0
  40. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/plots.py +29 -0
  41. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml +53 -0
  42. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/references/.gitkeep +0 -0
  43. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/.gitkeep +0 -0
  44. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep +0 -0
  45. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_data.py +5 -0
  46. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
  47. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py +3 -0
  48. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/branding.py +52 -0
  49. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py +188 -14
  50. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/explainability.py +102 -0
README.md CHANGED
@@ -11,395 +11,229 @@ short_description: Projet 05 formation Openclassrooms
11
  python_version: 3.11
12
  ---
13
 
14
- # projet_05 : Déployez un modèle de Machine Learning
15
 
16
- <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
17
- <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
18
- </a>
19
-
20
- [![mkdocs-shield]][mkdocs-url]
21
-
22
-
23
- ## Organisation du projet
24
-
25
- ```
26
- ├── LICENSE <- Open-source license if one is chosen
27
- ├── Makefile <- Makefile with convenience commands like `make data` or `make train`
28
- ├── README.md <- The top-level README for developers using this project.
29
- ├── data
30
- │ ├── external <- Data from third party sources.
31
- │ ├── interim <- Intermediate data that has been transformed.
32
- │ ├── processed <- The final, canonical data sets for modeling.
33
- │ └── raw <- The original, immutable data dump.
34
-
35
- ├── docs <- A default mkdocs project; see www.mkdocs.org for details
36
-
37
- ├── models <- Trained and serialized models, model predictions, or model summaries
38
-
39
- ├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
40
- │ the creator's initials, and a short `-` delimited description, e.g.
41
- │ `1.0-jqp-initial-data-exploration`.
42
-
43
- ├── pyproject.toml <- Project configuration file with package metadata for
44
- │ projet_05 and configuration for tools like black
45
-
46
- ├── references <- Data dictionaries, manuals, and all other explanatory materials.
47
-
48
- ├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
49
- │ └── figures <- Generated graphics and figures to be used in reporting
50
-
51
- ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
52
- │ generated with `pip freeze > requirements.txt`
53
-
54
- ├── setup.cfg <- Configuration file for flake8
55
-
56
- └── projet_05 <- Source code for use in this project.
57
-
58
- ├── __init__.py <- Makes projet_05 a Python module
59
-
60
- ├── config.py <- Store useful variables and configuration
61
-
62
- ├── dataset.py <- Scripts to download or generate data
63
-
64
- ├── features.py <- Code to create features for modeling
65
-
66
- ├── modeling
67
- │ ├── __init__.py
68
- │ ├── predict.py <- Code to run model inference with trained models
69
- │ └── train.py <- Code to train models
70
-
71
- └── plots.py <- Code to create visualizations
72
- ```
73
-
74
- ## Code hérité réutilisé
75
-
76
- - `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thème.
77
- - `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
78
-
79
- ## Base de données PostgreSQL
80
-
81
- Depuis la branche `postgresql`, toute la fusion des fichiers bruts repose sur une base PostgreSQL accessible via SQLAlchemy.
82
-
83
- 1. Installez PostgreSQL (Homebrew, package officiel, etc.).
84
- 2. Créez un rôle et la base attendue :
85
-
86
- > Exemple pour MacOS
87
-
88
- ```bash
89
- /opt/homebrew/opt/postgresql@17/bin/createuser -s postgres
90
- /opt/homebrew/opt/postgresql@17/bin/psql -d postgres -c "ALTER USER postgres WITH PASSWORD 'postgres';"
91
- /opt/homebrew/opt/postgresql@17/bin/createdb -O postgres projet05
92
- ```
93
-
94
- Adaptez les chemins/versions selon votre environnement.
95
- 3. Renseignez la chaîne de connexion dans `projet_05/settings.yml` :
96
-
97
- ```yaml
98
- database:
99
- url: postgresql+psycopg://user:password@host:5432/projet05
100
- schema: public
101
- ```
102
-
103
- Il est également possible de définir `PROJET05_DATABASE_URL` dans l'environnement.
104
-
105
- 4. Initialisez la base (création des tables + insertion des CSV d'exemple) avec :
106
-
107
- ```bash
108
- python -m scripts.init_db
109
- ```
110
-
111
- 5. Assurez-vous que l'utilisateur possède les droits `CREATE/DROP TABLE` dans le schéma ciblé : les tables `sirh`, `evaluation`, `sond` ainsi que `prediction_logs` seront créées ou recréées à chaque ré-exécution.
112
-
113
- 6. Lancez ensuite `python -m projet_05.dataset` comme auparavant (ou `python main.py` pour exécuter toutes les étapes). La requête SQL utilisée est toujours exportée dans `reports/merge_sql.sql` pour audit.
114
-
115
- > Les interactions utilisateur/modèle (qu'elles proviennent du formulaire, du tableau ou d'un upload) sont automatiquement journalisées dans la table `prediction_logs`, ce qui permet de tracer les usages et de constituer un dataset réel pour le monitoring.
116
-
117
- ## Tests & couverture
118
-
119
- Une batterie de tests Pytest valident l’intégrité de la base PostgreSQL, la fusion des données et la journalisation des prédictions.
120
-
121
- 1. Démarrez PostgreSQL (cf. section précédente) et créez un utilisateur ayant les droits `CREATE/DROP DATABASE`.
122
- 2. Facultatif : définissez `PROJET05_TEST_DATABASE_URL` si vous souhaitez utiliser une URL différente de `postgresql+psycopg://postgres:postgres@localhost:5432/projet05_test`.
123
- 3. Exécutez les tests et générez le rapport de couverture :
124
-
125
- ```bash
126
- pytest
127
- ```
128
 
129
- La configuration Pytest produit à la fois un rapport terminal (`--cov-report=term-missing`) et un fichier `coverage.xml` exploitable par vos outils CI/CD.
130
- Les sorties complètes sont sauvegardées dans `logs/tests_logs/<timestamp>.log`.
131
 
132
- Les tests vérifient notamment :
133
 
134
- - la création des tables `sirh`, `evaluation`, `sond`, `prediction_logs` et la cohérence du nombre de lignes insérées ;
135
- - l’intégrité du DataFrame fusionné (typage, absence de valeurs nulles sur la clé primaire, cohérence de la cible) ;
136
- - la robustesse du script de log des prédictions (insertion d’entrées dans `prediction_logs` et nettoyage) ;
137
- - la génération des logs de pipeline, regroupés dans `logs/pipeline_logs/<timestamp>.log`.
138
 
139
- --------
140
 
141
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
142
 
143
- <!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
144
- <a id="readme-top"></a>
145
- <!--
146
- *** Thanks for checking out the Best-README-Template. If you have a suggestion
147
- *** that would make this better, please fork the repo and create a pull request
148
- *** or simply open an issue with the tag "enhancement".
149
- *** Don't forget to give the project a star!
150
- *** Thanks again! Now go create something AMAZING! :D
151
- -->
152
 
153
- <!-- PROJECT SHIELDS -->
154
- <!--
155
- *** I'm using markdown "reference style" links for readability.
156
- *** Reference links are enclosed in brackets [ ] instead of parentheses ( ).
157
- *** See the bottom of this document for the declaration of the reference variables
158
- *** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use.
159
- *** https://www.markdownguide.org/basic-syntax/#reference-style-links
160
- -->
161
- [![Contributors][contributors-shield]][contributors-url]
162
- [![Python][python]][python]
163
- [![Forks][forks-shield]][forks-url]
164
- [![Stargazers][stars-shield]][stars-url]
165
- [![Issues][issues-shield]][issues-url]
166
- [![project_license][license-shield]][license-url]
167
- [![LinkedIn][linkedin-shield]][linkedin-url]
168
- ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)
169
 
170
- <!-- PROJECT LOGO -->
171
- <br />
172
- <div align="center">
173
- <a href="https://github.com/github_username/repo_name">
174
- <img src="images/logo.png" alt="Logo" width="80" height="80">
175
- </a>
176
-
177
- <h3 align="center">project_title</h3>
178
-
179
- <p align="center">
180
- project_description
181
- <br />
182
- <a href="https://github.com/github_username/repo_name"><strong>Explore the docs »</strong></a>
183
- <br />
184
- <br />
185
- <a href="https://github.com/github_username/repo_name">View Demo</a>
186
- &middot;
187
- <a href="https://github.com/github_username/repo_name/issues/new?labels=bug&template=bug-report---.md">Report Bug</a>
188
- &middot;
189
- <a href="https://github.com/github_username/repo_name/issues/new?labels=enhancement&template=feature-request---.md">Request Feature</a>
190
- </p>
191
- </div>
192
-
193
- <!-- TABLE OF CONTENTS -->
194
- <details>
195
- <summary>Table of Contents</summary>
196
- <ol>
197
- <li>
198
- <a href="#about-the-project">About The Project</a>
199
- <ul>
200
- <li><a href="#built-with">Built With</a></li>
201
- </ul>
202
- </li>
203
- <li>
204
- <a href="#getting-started">Getting Started</a>
205
- <ul>
206
- <li><a href="#prerequisites">Prerequisites</a></li>
207
- <li><a href="#installation">Installation</a></li>
208
- </ul>
209
- </li>
210
- <li><a href="#usage">Usage</a></li>
211
- <li><a href="#roadmap">Roadmap</a></li>
212
- <li><a href="#contributing">Contributing</a></li>
213
- <li><a href="#license">License</a></li>
214
- <li><a href="#contact">Contact</a></li>
215
- <li><a href="#acknowledgments">Acknowledgments</a></li>
216
- </ol>
217
- </details>
218
 
 
219
 
 
220
 
221
- <!-- ABOUT THE PROJECT -->
222
- ## About The Project
223
 
224
- [![Product Name Screen Shot][product-screenshot]](https://example.com)
 
 
225
 
226
- Here's a blank template to get started. To avoid retyping too much info, do a search and replace with your text editor for the following: `github_username`, `repo_name`, `twitter_handle`, `linkedin_username`, `email_client`, `email`, `project_title`, `project_description`, `project_license`
227
 
228
- <p align="right">(<a href="#readme-top">back to top</a>)</p>
 
 
 
 
 
 
229
 
 
230
 
 
 
 
 
 
231
 
232
- ### Built With
233
 
234
- * [![Python][Python]][Python-url]
235
- * [![SQL][SQL]][SQL-url]
 
 
 
236
 
237
- <p align="right">(<a href="#readme-top">back to top</a>)</p>
238
 
239
- <!-- GETTING STARTED -->
240
- ## Getting Started
241
 
242
- This is an example of how you may give instructions on setting up your project locally.
243
- To get a local copy up and running follow these simple example steps.
244
 
245
- ### Prerequisites
246
 
247
- This is an example of how to list things you need to use the software and how to install them.
248
- * npm
249
- ```sh
250
- npm install npm@latest -g
251
- ```
252
 
253
- ### Installation
254
 
255
- pip install -r requirements.txt
256
- uvicorn app.main:app --reload
257
 
258
- 1. Clone the repo
259
- ```sh
260
- git clone https://github.com/stephmnt/OCR_Projet05.git
261
- ```
262
- 2. Install NPM packages
263
- ```sh
264
- npm install
265
- ```
266
- 3. Enter your API in `config.js`
267
- ```js
268
- const API_KEY = 'ENTER YOUR API';
269
- ```
270
- 4. Change git remote url to avoid accidental pushes to base project
271
- ```sh
272
- git remote set-url origin github_username/repo_name
273
- git remote -v # confirm the changes
274
- ```
275
 
276
- <p align="right">(<a href="#readme-top">back to top</a>)</p>
 
 
 
 
 
277
 
 
278
 
 
 
 
279
 
280
- <!-- USAGE EXAMPLES -->
281
- ## Usage
282
 
283
- Use this space to show useful examples of how a project can be used. Additional screenshots, code examples and demos work well in this space. You may also link to more resources.
 
 
 
 
284
 
285
- _For more examples, please refer to the [Documentation](https://example.com)_
286
 
287
- <p align="right">(<a href="#readme-top">back to top</a>)</p>
288
 
 
289
 
 
290
 
291
- <!-- ROADMAP -->
292
- ## Roadmap
293
 
294
- - [ ] Feature 1
295
- - [ ] Feature 2
296
- - [ ] Feature 3
297
- - [ ] Nested Feature
298
 
299
- See the [open issues](https://github.com/stephmnt/OCR_projet05/issues) for a full list of proposed features (and known issues).
300
 
301
- <p align="right">(<a href="#readme-top">back to top</a>)</p>
 
302
 
 
303
 
 
304
 
305
- <!-- CONTRIBUTING -->
306
- ## Contributing
307
 
308
- Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**.
 
 
309
 
310
- If you have a suggestion that would make this better, please fork the repo and create a pull request. You can also simply open an issue with the tag "enhancement".
311
- Don't forget to give the project a star! Thanks again!
312
 
313
- 1. Fork the Project
314
- 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
315
- 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
316
- 4. Push to the Branch (`git push origin feature/AmazingFeature`)
317
- 5. Open a Pull Request
318
 
319
- <p align="right">(<a href="#readme-top">back to top</a>)</p>
 
 
 
 
 
320
 
321
- ### Top contributors:
322
 
323
- <a href="https://github.com/github_username/repo_name/graphs/contributors">
324
- <img src="https://contrib.rocks/image?repo=github_username/repo_name" alt="contrib.rocks image" />
325
- </a>
326
 
 
 
 
327
 
 
328
 
329
- <!-- LICENSE -->
330
- ## License
331
 
332
- Distributed under the project_license. See `LICENSE.txt` for more information.
333
 
334
- <p align="right">(<a href="#readme-top">back to top</a>)</p>
 
 
335
 
 
 
336
 
 
337
 
338
- <!-- CONTACT -->
339
- ## Contact
340
 
341
- Your Name - [@twitter_handle](https://twitter.com/twitter_handle) - email@email_client.com
342
 
343
- Project Link: [https://github.com/github_username/repo_name](https://github.com/github_username/repo_name)
344
 
345
- <p align="right">(<a href="#readme-top">back to top</a>)</p>
 
 
 
 
 
 
 
 
 
 
346
 
 
347
 
 
348
 
349
- <!-- ACKNOWLEDGMENTS -->
350
- ## Acknowledgments
351
 
352
- * []()
353
- * []()
354
- * []()
 
355
 
356
- <p align="right">(<a href="#readme-top">back to top</a>)</p>
357
 
 
 
 
 
 
 
 
358
 
 
359
 
360
- <!-- MARKDOWN LINKS & IMAGES -->
361
- <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
362
- [contributors-shield]: https://img.shields.io/github/contributors/stephmnt/OCR_projet05.svg?style=for-the-badge
363
- [contributors-url]: https://github.com/stephmnt/OCR_projet05/graphs/contributors
364
- [forks-shield]: https://img.shields.io/github/forks/stephmnt/OCR_projet05.svg?style=for-the-badge
365
- [forks-url]: https://github.com/stephmnt/OCR_projet05/network/members
366
- [stars-shield]: https://img.shields.io/github/stars/stephmnt/OCR_projet05.svg?style=for-the-badge
367
- [stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
368
- [issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
369
- [issues-url]: https://github.com/stephmnt/OCR_projet05/issues
370
- [product-screenshot]: images/screenshot.png
371
- [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
372
- <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
373
- [Next.js]: https://img.shields.io/badge/next.js-000000?style=for-the-badge&logo=nextdotjs&logoColor=white
374
- [Next-url]: https://nextjs.org/
375
- [React.js]: https://img.shields.io/badge/React-20232A?style=for-the-badge&logo=react&logoColor=61DAFB
376
- [React-url]: https://reactjs.org/
377
- [Vue.js]: https://img.shields.io/badge/Vue.js-35495E?style=for-the-badge&logo=vuedotjs&logoColor=4FC08D
378
- [Vue-url]: https://vuejs.org/
379
- [Angular.io]: https://img.shields.io/badge/Angular-DD0031?style=for-the-badge&logo=angular&logoColor=white
380
- [Angular-url]: https://angular.io/
381
- [Svelte.dev]: https://img.shields.io/badge/Svelte-4A4A55?style=for-the-badge&logo=svelte&logoColor=FF3E00
382
- [Svelte-url]: https://svelte.dev/
383
- [Laravel.com]: https://img.shields.io/badge/Laravel-FF2D20?style=for-the-badge&logo=laravel&logoColor=white
384
- [Laravel-url]: https://laravel.com
385
- [Bootstrap.com]: https://img.shields.io/badge/Bootstrap-563D7C?style=for-the-badge&logo=bootstrap&logoColor=white
386
- [Bootstrap-url]: https://getbootstrap.com
387
- [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
388
- [JQuery-url]: https://jquery.com
389
- <!-- OK -->
390
- [license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
391
- [license-url]: https://github.com/stephmnt/OCR_Projet05/blob/main/LICENSE
392
- [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
393
- [linkedin-url]: https://linkedin.com/in/stephanemanet
394
- <!-- TODO: -->
395
- [postgres-shield]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
396
- [python-shield]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
397
- [mkdocs-shield]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
398
- [mkdocs-url]: https://stephmnt.github.io/OCR_Projet05/
399
- [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
400
- [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
401
 
402
- ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)
403
- [![https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff]][[mkdocs-url](https://stephmnt.github.io/OCR_Projet05/)]
404
- ![GitHub Release Date](https://img.shields.io/github/release-date/stephmnt/OCR_Projet05?display_date=published_at&style=flat-square)
405
- ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)
 
11
  python_version: 3.11
12
  ---
13
 
14
+ # OCR Projet 05 Prédiction d’attrition
15
 
16
+ ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)
17
+ ![GitHub Release Date](https://img.shields.io/github/release-date/stephmnt/OCR_Projet05?display_date=published_at&style=flat-square)
18
+ [![project_license][https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge]][https://github.com/stephmnt/OCR_Projet05/blob/main/LICENSE]
19
+ ![MkDocs][https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff][https://stephmnt.github.io/OCR_Projet05/]
20
+ ![[Cookie Cutter][https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter]][https://cookiecutter-data-science.drivendata.org/]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ Ce dépôt contient le projet OCR_Projet05. Il s’agit d’une application Gradio déployable sur Hugging Face Spaces, alimentée par un pipeline de préparation de données, un entraînement automatique et des services d’inférence orientés RH (prédiction de départ d’employés).
 
23
 
24
+ Ce document décrit :
25
 
26
+ - la **présentation fonctionnelle** ;
27
+ - les **instructions d’installation, d’utilisation et de déploiement** (local + Hugging Face);
28
+ - le **processus de stockage/gestion des données** (PostgreSQL + journaux) ;
29
+ - les **besoins analytiques** (tableaux de bord, métriques clés).
30
 
31
+ ---
32
 
33
+ ## 1. Vue d’ensemble du projet
34
 
35
+ - **Objectif métier** : détecter les employés à risque de départ en exploitant 3 sources brutes (SIRH, évaluation, sondage interne).
36
+ - **Technologie** : pipeline Python (Typer, pandas, scikit-learn, SQLAlchemy) + application Gradio (`app.py`) déployée sur Hugging Face.
37
+ - **Modèle** : pipeline scikit-learn (prétraitement + classifieur) sérialisé dans `models/best_model.joblib`, paramétré avec un seuil de décision optimisé (visible dans l’UI).
38
+ - **Journaux** : deux sous-dossiers `logs/pipeline_logs` et `logs/tests_logs` contiennent respectivement les traces du pipeline `main.py` et les sorties Pytest.
 
 
 
 
 
39
 
40
+ Arborescence clé :
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ ```
43
+ ├── projet_05/ # Package Python principal
44
+ ├── app.py # Interface Gradio (déploiement HF)
45
+ ├── scripts/init_db.py # Création/initialisation PostgreSQL
46
+ ├── main.py # Orchestrateur du pipeline local
47
+ ├── docs/ # Documentation MkDocs + tests.md
48
+ ├── tests/ # Suite Pytest (DB + intégration)
49
+ └── requirements.txt # Dépendances runtime (HF)
50
+ ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ ---
53
 
54
+ ## 2. Installation locale
55
 
56
+ ### 2.1. Prérequis
 
57
 
58
+ 1. Python 3.11 (virtualenv ou Poetry recommandé).
59
+ 2. PostgreSQL (>= 17) accessible localement (cf. instructions DB plus bas).
60
+ 3. Outils optionnels : `make`, `pip`, `pytest`.
61
 
62
+ ### 2.2. Étapes
63
 
64
+ ```bash
65
+ git clone https://github.com/stephmnt/OCR_Projet05.git
66
+ cd OCR_Projet05
67
+ python -m venv .venv && source .venv/bin/activate
68
+ pip install -r requirements.txt # pour HF
69
+ pip install -e . # pour le développement local (pyproject)
70
+ ```
71
 
72
+ ### 2.3. Configuration PostgreSQL
73
 
74
+ ```bash
75
+ /opt/homebrew/opt/postgresql@17/bin/createuser -s postgres
76
+ /opt/homebrew/opt/postgresql@17/bin/psql -d postgres -c "ALTER USER postgres WITH PASSWORD 'postgres';"
77
+ /opt/homebrew/opt/postgresql@17/bin/createdb -O postgres projet05
78
+ ```
79
 
80
+ Puis dans `projet_05/settings.yml` :
81
 
82
+ ```yaml
83
+ database:
84
+ url: postgresql+psycopg://postgres:postgres@localhost:5432/projet05
85
+ schema: public
86
+ ```
87
 
88
+ > Sur une autre infrastructure, adaptez l’URL ou utilisez `PROJET05_DATABASE_URL`.
89
 
90
+ ---
 
91
 
92
+ ## 3. Utilisation du pipeline
 
93
 
94
+ ### 3.1. Initialiser la base
95
 
96
+ ```bash
97
+ python -m scripts.init_db
98
+ ```
 
 
99
 
100
+ Création des tables `sirh`, `evaluation`, `sond`, `prediction_logs` + insertion des CSV bruts situés dans `data/raw`.
101
 
102
+ ### 3.2. Pipeline complet
 
103
 
104
+ ```bash
105
+ python main.py
106
+ ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ - `main.py` enchaîne :
109
+ 1. Initialisation base PostgreSQL (`scripts.init_db`)
110
+ 2. Préparation des données (`projet_05.dataset`)
111
+ 3. Feature engineering (`projet_05.features`)
112
+ 4. Entraînement (`projet_05.modeling.train`)
113
+ - Les logs sont regroupés dans `logs/pipeline_logs/<timestamp>.log`.
114
 
115
+ ### 3.3. Application Gradio locale
116
 
117
+ ```bash
118
+ python app.py
119
+ ```
120
 
121
+ L’interface propose :
 
122
 
123
+ - onglet **Formulaire** ;
124
+ - **Tableau interactif** ;
125
+ - **Upload CSV** ;
126
+ - **Fichiers non-mergés** (chargement des trois CSV bruts, fusion automatique).
127
+ Un appel à `_log_predictions` trace chaque prédiction dans PostgreSQL (`prediction_logs`).
128
 
129
+ ---
130
 
131
+ ## 4. Déploiement sur Hugging Face
132
 
133
+ ### 4.1. Dépendances
134
 
135
+ `requirements.txt` contient toutes les bibliothèques nécessaires à la Space (Gradio, scikit-learn, pandas, SQLAlchemy, psycopg…).
136
 
137
+ ### 4.2. Étapes
 
138
 
139
+ 1. Créer une Space Gradio (Python 3.11).
140
+ 2. Copier `app.py`, `requirements.txt`, `models/`, `data/processed/schema.json`.
141
+ 3. Configurer les secrets HF (si besoin de variables d’environnement).
142
+ 4. Optionnel : définir `HUGGINGFACEHUB_API_TOKEN` pour automatiser les déploiements via GitHub Actions.
143
 
144
+ ### 4.3. Spécificités Space
145
 
146
+ - Hugging Face n’expose pas PostgreSQL. L’application Gradio bascule alors sur le mode **pandas fallback** (fusion locale) grâce à la gestion d’erreur de `dataset.py`.
147
+ - Les journaux restants sont ceux générés par l’appli (pas d’écriture dans `logs/` côté Space).
148
 
149
+ ---
150
 
151
+ ## 5. Processus de stockage & gestion des données
152
 
153
+ ### 5.1. Sources
 
154
 
155
+ - `data/raw/extrait_sirh.csv`
156
+ - `data/raw/extrait_eval.csv`
157
+ - `data/raw/extrait_sondage.csv`
158
 
159
+ ### 5.2. Base relationnelle
 
160
 
161
+ Tables PostgreSQL créées par `scripts.init_db` :
 
 
 
 
162
 
163
+ | Table | Rôle | Colonnes clés |
164
+ | --- | --- | --- |
165
+ | `sirh` | Profil RH structuré | `id_employee`, `age`, `revenu_mensuel`, `poste`, etc. |
166
+ | `evaluation` | Historique d’évaluations | `id_employee`, `note_evaluation_actuelle`, `niveau_hierarchique_poste`, `satisfaction_*` |
167
+ | `sond` | Sondage + cible | `id_employee`, `a_quitte_l_entreprise`, `distance_domicile_travail`, `domaine_etude`, etc. |
168
+ | `prediction_logs` | Journal d’inférence | `log_id`, `created_at`, `id_employee`, `source`, `probability`, `decision`, `payload` JSON |
169
 
170
+ `projet_05.dataset` fusionne `sirh ∩ evaluation ∩ sond` via SQL ; en cas d’indisponibilité DB, la fusion pandas est utilisée en repli.
171
 
172
+ ### 5.3. Journaux et tracing
 
 
173
 
174
+ - `logs/pipeline_logs` : sorties `main.py`
175
+ - `logs/tests_logs` : sorties Pytest (`make test`)
176
+ - `prediction_logs` : base PostgreSQL, indispensable pour l’audit des décisions ML.
177
 
178
+ ---
179
 
180
+ ## 6. Tests et couverture
 
181
 
182
+ ### 6.1. Exécution
183
 
184
+ ```bash
185
+ pytest
186
+ ```
187
 
188
+ - La fixture `initialized_db` crée une base `projet05_test`, lance `scripts.init_db`, puis la supprime.
189
+ - Les logs Pytest sont stockés dans `logs/tests_logs/<timestamp>.log`.
190
 
191
+ ### 6.2. Couverture
192
 
193
+ - Rapports `term-missing` + `coverage.xml`.
194
+ - Zones non couvertes : `features.py`, `modeling/train.py`, `explainability.py` (à prioriser si besoin).
195
 
196
+ ---
197
 
198
+ ## 7. Besoins analytiques / tableaux de bord
199
 
200
+ - **Dashboard RH** basé sur les journaux `prediction_logs` :
201
+ - Volume de prédictions par source (Formulaire / CSV / Raw).
202
+ - Répartition des scores (`proba_depart`) / seuil de décision.
203
+ - Historique des décisions (tendance du taux de risque).
204
+ - Drill-down par attributs (`departement`, `poste`, `genre`…).
205
+ - **Monitoring modèle** :
206
+ - Taux d���utilisation (logs journaliers).
207
+ - Drift potentiel : comparer les distributions des features avec `docs/` (notebooks d’analyse) ou via un outil externe.
208
+ - **KPI Data/IT** :
209
+ - Latence d’inférence (calculable via timestamps, si ajoutés).
210
+ - Suivi des erreurs (logs pipeline/tests).
211
 
212
+ ---
213
 
214
+ ## 8. Choix techniques et justification
215
 
216
+ Ce projet combine une interface Gradio, une base PostgreSQL et un pipeline CI/CD GitHub Actions. Les décisions d’architecture détaillant le pourquoi/du comment (Gradio vs FastAPI, choix de PostgreSQL, automatisations) sont regroupées dans [`docs/docs/choix-techniques.md`](docs/docs/choix-techniques.md). Cette section sert de support de soutenance pour rappeler :
 
217
 
218
+ - pourquoi Gradio a été privilégié pour la démonstration Hugging Face ;
219
+ - comment PostgreSQL sécurise la fusion des trois sources et la journalisation ;
220
+ - en quoi les workflows GitHub Actions garantissent un déploiement fiable.
221
+ - comment les environnements sont configurés : `main.py` est exécuté en environnement **test** (base `projet05_test`, variables `PROJET05_TEST_DATABASE_URL`) pour valider le pipeline complet, tandis que `app.py` tourne en **production** (Space Hugging Face, variable `PROJET05_DATABASE_URL`/fallback pandas) afin de servir les utilisateurs finaux.
222
 
223
+ ## 9. Instructions rapides
224
 
225
+ | Action | Commande |
226
+ | --- | --- |
227
+ | Init DB + pipeline complet | `python main.py` |
228
+ | Lancer Gradio local | `python app.py` |
229
+ | Initialiser la base seule | `python -m scripts.init_db` |
230
+ | Lancer les tests + logs | `make test` |
231
+ | Déployer sur Hugging Face | Pousser `app.py`, `requirements.txt`, `models/`, config Space |
232
 
233
+ ---
234
 
235
+ ## 10. Licence / références
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
+ Ce projet est fourni dans le cadre de la formation OpenClassrooms.
238
+ La documentation complémentaire est disponible dans `docs/` (MkDocs + `docs/docs/tests.md` pour les tests).
239
+ Pour toute question : [LinkedIn](https://linkedin.com/in/stephanemanet).
 
hf_space/hf_space/hf_space/hf_space/Makefile CHANGED
@@ -43,7 +43,9 @@ format:
43
  ## Run tests
44
  .PHONY: test
45
  test:
46
- python -m pytest tests
 
 
47
 
48
 
49
  ## Set up Python interpreter environment
 
43
  ## Run tests
44
  .PHONY: test
45
  test:
46
+ @mkdir -p logs/tests_logs
47
+ @timestamp=$$(date +%Y%m%d_%H%M%S); \
48
+ pytest | tee logs/tests_logs/$$timestamp.log
49
 
50
 
51
  ## Set up Python interpreter environment
hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore CHANGED
@@ -7,6 +7,7 @@ questions.md
7
  /reports/
8
  /data/
9
  runtime.txt
 
10
 
11
  # vim
12
  *.swp
 
7
  /reports/
8
  /data/
9
  runtime.txt
10
+ /logs/
11
 
12
  # vim
13
  *.swp
hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED
@@ -76,6 +76,66 @@ python_version: 3.11
76
  - `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thème.
77
  - `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  --------
80
 
81
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
@@ -105,7 +165,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
105
  [![Issues][issues-shield]][issues-url]
106
  [![project_license][license-shield]][license-url]
107
  [![LinkedIn][linkedin-shield]][linkedin-url]
108
- ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
109
 
110
  <!-- PROJECT LOGO -->
111
  <br />
@@ -338,3 +398,8 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
338
  [mkdocs-url]: https://stephmnt.github.io/OCR_Projet05/
339
  [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
340
  [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
 
 
 
 
 
 
76
  - `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thème.
77
  - `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
78
 
79
+ ## Base de données PostgreSQL
80
+
81
+ Depuis la branche `postgresql`, toute la fusion des fichiers bruts repose sur une base PostgreSQL accessible via SQLAlchemy.
82
+
83
+ 1. Installez PostgreSQL (Homebrew, package officiel, etc.).
84
+ 2. Créez un rôle et la base attendue :
85
+
86
+ > Exemple pour MacOS
87
+
88
+ ```bash
89
+ /opt/homebrew/opt/postgresql@17/bin/createuser -s postgres
90
+ /opt/homebrew/opt/postgresql@17/bin/psql -d postgres -c "ALTER USER postgres WITH PASSWORD 'postgres';"
91
+ /opt/homebrew/opt/postgresql@17/bin/createdb -O postgres projet05
92
+ ```
93
+
94
+ Adaptez les chemins/versions selon votre environnement.
95
+ 3. Renseignez la chaîne de connexion dans `projet_05/settings.yml` :
96
+
97
+ ```yaml
98
+ database:
99
+ url: postgresql+psycopg://user:password@host:5432/projet05
100
+ schema: public
101
+ ```
102
+
103
+ Il est également possible de définir `PROJET05_DATABASE_URL` dans l'environnement.
104
+
105
+ 4. Initialisez la base (création des tables + insertion des CSV d'exemple) avec :
106
+
107
+ ```bash
108
+ python -m scripts.init_db
109
+ ```
110
+
111
+ 5. Assurez-vous que l'utilisateur possède les droits `CREATE/DROP TABLE` dans le schéma ciblé : les tables `sirh`, `evaluation`, `sond` ainsi que `prediction_logs` seront créées ou recréées à chaque ré-exécution.
112
+
113
+ 6. Lancez ensuite `python -m projet_05.dataset` comme auparavant (ou `python main.py` pour exécuter toutes les étapes). La requête SQL utilisée est toujours exportée dans `reports/merge_sql.sql` pour audit.
114
+
115
+ > Les interactions utilisateur/modèle (qu'elles proviennent du formulaire, du tableau ou d'un upload) sont automatiquement journalisées dans la table `prediction_logs`, ce qui permet de tracer les usages et de constituer un dataset réel pour le monitoring.
116
+
117
+ ## Tests & couverture
118
+
119
+ Une batterie de tests Pytest valident l’intégrité de la base PostgreSQL, la fusion des données et la journalisation des prédictions.
120
+
121
+ 1. Démarrez PostgreSQL (cf. section précédente) et créez un utilisateur ayant les droits `CREATE/DROP DATABASE`.
122
+ 2. Facultatif : définissez `PROJET05_TEST_DATABASE_URL` si vous souhaitez utiliser une URL différente de `postgresql+psycopg://postgres:postgres@localhost:5432/projet05_test`.
123
+ 3. Exécutez les tests et générez le rapport de couverture :
124
+
125
+ ```bash
126
+ pytest
127
+ ```
128
+
129
+ La configuration Pytest produit à la fois un rapport terminal (`--cov-report=term-missing`) et un fichier `coverage.xml` exploitable par vos outils CI/CD.
130
+ Les sorties complètes sont sauvegardées dans `logs/tests_logs/<timestamp>.log`.
131
+
132
+ Les tests vérifient notamment :
133
+
134
+ - la création des tables `sirh`, `evaluation`, `sond`, `prediction_logs` et la cohérence du nombre de lignes insérées ;
135
+ - l’intégrité du DataFrame fusionné (typage, absence de valeurs nulles sur la clé primaire, cohérence de la cible) ;
136
+ - la robustesse du script de log des prédictions (insertion d’entrées dans `prediction_logs` et nettoyage) ;
137
+ - la génération des logs de pipeline, regroupés dans `logs/pipeline_logs/<timestamp>.log`.
138
+
139
  --------
140
 
141
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
165
  [![Issues][issues-shield]][issues-url]
166
  [![project_license][license-shield]][license-url]
167
  [![LinkedIn][linkedin-shield]][linkedin-url]
168
+ ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)
169
 
170
  <!-- PROJECT LOGO -->
171
  <br />
 
398
  [mkdocs-url]: https://stephmnt.github.io/OCR_Projet05/
399
  [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
400
  [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
401
+
402
+ ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)
403
+ [![https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff]][[mkdocs-url](https://stephmnt.github.io/OCR_Projet05/)]
404
+ ![GitHub Release Date](https://img.shields.io/github/release-date/stephmnt/OCR_Projet05?display_date=published_at&style=flat-square)
405
+ ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml)
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED
@@ -5,15 +5,265 @@ from pathlib import Path
5
  from typing import Any
6
 
7
  import gradio as gr
 
8
  import pandas as pd
9
  from loguru import logger
 
 
10
 
11
  from projet_05.branding import apply_brand_theme
12
  from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
 
 
13
 
14
  MODEL_PATH = Path("models/best_model.joblib")
15
  METADATA_PATH = Path("models/best_model_meta.json")
16
  SCHEMA_PATH = Path("data/processed/schema.json")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
 
19
  def _load_schema(path: Path) -> dict[str, Any]:
@@ -54,6 +304,28 @@ def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
54
  return []
55
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
58
  """Normalize any user input into a validated DataFrame.
59
 
@@ -79,6 +351,195 @@ def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
79
  return df
80
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def _ensure_model():
83
  """Ensure that a pipeline has been loaded before inference."""
84
  if PIPELINE is None:
@@ -90,46 +551,77 @@ def _ensure_model():
90
  def score_table(table):
91
  """Score data entered via the interactive table."""
92
  _ensure_model()
93
- df = _convert_input(table, FEATURE_ORDER)
 
 
94
  drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
95
- return run_inference(
96
  df,
97
  PIPELINE,
98
  THRESHOLD,
99
  drop_columns=drop_cols,
100
  required_features=FEATURE_ORDER or None,
101
  )
 
 
102
 
103
 
104
  def score_csv(upload):
 
105
  """Score a CSV uploaded by the user."""
106
  _ensure_model()
107
  if upload is None:
108
  raise gr.Error("Veuillez déposer un fichier CSV.")
109
  df = pd.read_csv(upload.name)
 
 
110
  drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
111
- return run_inference(
112
  df,
113
  PIPELINE,
114
  THRESHOLD,
115
  drop_columns=drop_cols,
116
  required_features=FEATURE_ORDER or None,
117
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
 
120
  def predict_from_form(*values):
121
  """Score a single row coming from the form tab."""
122
  _ensure_model()
123
- if not FEATURE_ORDER:
124
  raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
125
- payload = {feature: value for feature, value in zip(FEATURE_ORDER, values)}
126
  df = pd.DataFrame([payload])
 
 
127
  scored = run_inference(
128
  df,
129
  PIPELINE,
130
  THRESHOLD,
131
  required_features=FEATURE_ORDER or None,
132
  )
 
133
  row = scored.iloc[0]
134
  label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
135
  return {
@@ -157,11 +649,37 @@ except FileNotFoundError as exc:
157
  logger.warning("Artéfact manquant: {}", exc)
158
 
159
  FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  with gr.Blocks(title="Prédicteur d'attrition") as demo:
162
- gr.Markdown("# API Gradio – Prédiction de départ employé")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  gr.Markdown(
164
- "Le modèle applique le pipeline entraîné hors-notebook pour fournir une probabilité de départ ainsi qu'une décision binaire."
165
  )
166
 
167
  if PIPELINE is None:
@@ -172,26 +690,22 @@ with gr.Blocks(title="Prédicteur d'attrition") as demo:
172
  gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
173
 
174
  with gr.Tab("Formulaire unitaire"):
175
- if not FEATURE_ORDER:
176
  gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
177
  else:
178
  form_inputs: list[gr.components.Component] = [] # type: ignore
179
- for feature in FEATURE_ORDER:
180
- form_inputs.append(
181
- gr.Textbox(label=feature, placeholder=f"Saisir {feature.replace('_', ' ')}")
182
- )
183
  form_output = gr.JSON(label="Résultat")
184
  gr.Button("Prédire").click(
185
  fn=predict_from_form,
186
  inputs=form_inputs,
187
  outputs=form_output,
188
  )
189
-
190
- with gr.Tab("Tableau interactif"):
191
  table_input = gr.Dataframe(
192
- headers=FEATURE_ORDER if FEATURE_ORDER else None,
193
  row_count=(1, "dynamic"),
194
- col_count=(len(FEATURE_ORDER), "dynamic") if FEATURE_ORDER else (5, "dynamic"),
195
  type="pandas",
196
  )
197
  table_output = gr.Dataframe(label="Prédictions", type="pandas")
@@ -201,7 +715,8 @@ with gr.Blocks(title="Prédicteur d'attrition") as demo:
201
  outputs=table_output,
202
  )
203
 
204
- with gr.Tab("Fichier CSV"):
 
205
  file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
206
  file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
207
  gr.Button("Scorer le fichier").click(
@@ -210,6 +725,21 @@ with gr.Blocks(title="Prédicteur d'attrition") as demo:
210
  outputs=file_output,
211
  )
212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  if __name__ == "__main__":
215
  demo.launch()
 
5
  from typing import Any
6
 
7
  import gradio as gr
8
+ import numpy as np
9
  import pandas as pd
10
  from loguru import logger
11
+ from sqlalchemy import create_engine
12
+ from sqlalchemy.engine import Engine
13
 
14
  from projet_05.branding import apply_brand_theme
15
  from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
16
+ from projet_05.settings import load_settings
17
+ from projet_05 import dataset as ds
18
 
19
  MODEL_PATH = Path("models/best_model.joblib")
20
  METADATA_PATH = Path("models/best_model_meta.json")
21
  SCHEMA_PATH = Path("data/processed/schema.json")
22
+ DERIVED_FEATURES = {
23
+ "augmentation_par_revenu",
24
+ "annee_sur_poste_par_experience",
25
+ "nb_formation_par_experience",
26
+ "score_moyen_satisfaction",
27
+ "dern_promo_par_experience",
28
+ "evolution_note",
29
+ }
30
+ SATISFACTION_COLUMNS = [
31
+ "satisfaction_employee_environnement",
32
+ "satisfaction_employee_nature_travail",
33
+ "satisfaction_employee_equipe",
34
+ "satisfaction_employee_equilibre_pro_perso",
35
+ ]
36
+ NUMERIC_CODE_COLUMNS = ["niveau_hierarchique_poste", "niveau_education"]
37
+ NUMERIC_FEATURES: set[str] = set()
38
+ CATEGORICAL_FEATURES: set[str] = set()
39
+
40
+ # Configuration manuelle des champs d'entrée (label + placeholder).
41
+ FIELD_UI_CONFIG = [
42
+ {"name": "age", "label": "Âge", "placeholder": "Âge en années (ex : 35)"},
43
+ {"name": "revenu_mensuel", "label": "Revenu mensuel (€)", "placeholder": "Montant mensuel en euros (ex : 4500)"},
44
+ {"name": "annees_dans_l_entreprise", "label": "Années dans l'entreprise", "placeholder": "Ancienneté totale (ex : 4.5)"},
45
+ {"name": "annees_dans_le_poste_actuel", "label": "Années sur le poste actuel", "placeholder": "Durée dans le poste (ex : 2)"},
46
+ {
47
+ "name": "annees_depuis_la_derniere_promotion",
48
+ "label": "Années depuis la dernière promotion",
49
+ "placeholder": "Durée depuis la dernière promotion (ex : 1)",
50
+ },
51
+ {
52
+ "name": "distance_domicile_travail",
53
+ "label": "Distance domicile-travail (km)",
54
+ "placeholder": "Distance en kilomètres (ex : 12)",
55
+ },
56
+ {
57
+ "name": "nombre_participation_pee",
58
+ "label": "Nombre de participations PEE",
59
+ "placeholder": "Nombre de participations (entier)",
60
+ },
61
+ {
62
+ "name": "note_evaluation_actuelle",
63
+ "label": "Note d'évaluation actuelle",
64
+ "placeholder": "Score actuel (1 à 5)",
65
+ },
66
+ {
67
+ "name": "note_evaluation_precedente",
68
+ "label": "Note d'évaluation précédente",
69
+ "placeholder": "Score précédent (1 à 5)",
70
+ },
71
+ {
72
+ "name": "annees_depuis_le_changement_deposte",
73
+ "label": "Années depuis le dernier changement de poste",
74
+ "placeholder": "Temps écoulé (ex : 0 si jamais)",
75
+ },
76
+ {
77
+ "name": "annee_experience_totale",
78
+ "label": "Années d'expérience totale",
79
+ "placeholder": "Expérience cumulative (ex : 8)",
80
+ },
81
+ {
82
+ "name": "nb_formations_suivies",
83
+ "label": "Nombre de formations suivies",
84
+ "placeholder": "Total des formations (entier)",
85
+ },
86
+ {
87
+ "name": "satisfaction_employee_environnement",
88
+ "label": "Satisfaction environnement",
89
+ "placeholder": "Note de 1 (faible) à 5 (forte)",
90
+ "info": "Valeur comprise entre 1 et 5",
91
+ },
92
+ {
93
+ "name": "satisfaction_employee_nature_travail",
94
+ "label": "Satisfaction nature du travail",
95
+ "placeholder": "Note de 1 à 5",
96
+ "info": "Valeur comprise entre 1 et 5",
97
+ },
98
+ {
99
+ "name": "satisfaction_employee_equipe",
100
+ "label": "Satisfaction équipe",
101
+ "placeholder": "Note de 1 à 5",
102
+ "info": "Valeur comprise entre 1 et 5",
103
+ },
104
+ {
105
+ "name": "satisfaction_employee_equilibre_pro_perso",
106
+ "label": "Satisfaction équilibre pro/perso",
107
+ "placeholder": "Note de 1 à 5",
108
+ "info": "Valeur comprise entre 1 et 5",
109
+ },
110
+ {
111
+ "name": "genre",
112
+ "label": "Genre",
113
+ "component": "dropdown",
114
+ "choices": ["Femme", "Homme"],
115
+ "info": "Sélectionnez le genre",
116
+ },
117
+ {
118
+ "name": "departement",
119
+ "label": "Département",
120
+ "component": "dropdown",
121
+ "choices": ["Commercial", "Consulting", "Ressources Humaines"],
122
+ },
123
+ {
124
+ "name": "frequence_deplacement",
125
+ "label": "Fréquence des déplacements",
126
+ "component": "dropdown",
127
+ "choices": ["Aucun", "Occasionnel", "Frequent"],
128
+ },
129
+ {
130
+ "name": "statut_marital",
131
+ "label": "Statut marital",
132
+ "component": "dropdown",
133
+ "choices": ["Célibataire", "Marié(e)", "Divorcé(e)"],
134
+ },
135
+ {
136
+ "name": "poste",
137
+ "label": "Poste occupé",
138
+ "component": "dropdown",
139
+ "choices": [
140
+ "Cadre Commercial",
141
+ "Assistant de Direction",
142
+ "Consultant",
143
+ "Tech Lead",
144
+ "Manager",
145
+ "Senior Manager",
146
+ "Représentant Commercial",
147
+ "Directeur Technique",
148
+ "Ressources Humaines",
149
+ ],
150
+ },
151
+ {
152
+ "name": "niveau_hierarchique_poste",
153
+ "label": "Niveau hiérarchique",
154
+ "component": "dropdown",
155
+ "choices": [
156
+ "1, junior",
157
+ "2",
158
+ "3",
159
+ "4",
160
+ "5, senior",
161
+ ],
162
+ "info": "Valeur numérique issue du SIRH (1 à 5)",
163
+ },
164
+ {
165
+ "name": "niveau_education",
166
+ "label": "Niveau d'études",
167
+ "component": "dropdown",
168
+ "choices": [
169
+ "1, licence",
170
+ "2",
171
+ "3",
172
+ "4",
173
+ "5, master",
174
+ ],
175
+ "info": "Indice numérique (1 à 5) figurant dans les exports bruts",
176
+ },
177
+ {
178
+ "name": "domaine_etude",
179
+ "label": "Domaine d'étude",
180
+ "component": "dropdown",
181
+ "choices": ["Entrepreunariat", "Infra & Cloud", "Marketing", "Ressources Humaines", "Transformation Digitale"],
182
+ },
183
+ {
184
+ "name": "heure_supplementaires",
185
+ "label": "Heures supplémentaires",
186
+ "component": "dropdown",
187
+ "choices": ["Oui", "Non"],
188
+ },
189
+ ]
190
+ FIELD_UI_LOOKUP = {cfg["name"]: cfg for cfg in FIELD_UI_CONFIG}
191
+ try:
192
+ SETTINGS = load_settings()
193
+ except Exception: # pragma: no cover - remains optional when config absent
194
+ SETTINGS = None
195
+ CACHED_ENGINE: Engine | None = None
196
+ CATEGORICAL_NORMALIZERS: dict[str, dict[str, str]] = {
197
+ "genre": {
198
+ "f": "F",
199
+ "femme": "F",
200
+ "m": "M",
201
+ "homme": "M",
202
+ },
203
+ "statut_marital": {
204
+ "célibataire": "Célibataire",
205
+ "celibataire": "Célibataire",
206
+ "marié(e)": "Marié(e)",
207
+ "marie(e)": "Marié(e)",
208
+ "marie": "Marié(e)",
209
+ "marié": "Marié(e)",
210
+ "divorcé(e)": "Divorcé(e)",
211
+ "divorce(e)": "Divorcé(e)",
212
+ },
213
+ "departement": {
214
+ "commercial": "Commercial",
215
+ "consulting": "Consulting",
216
+ "ressources humaines": "Ressources Humaines",
217
+ },
218
+ "poste": {
219
+ "cadre commercial": "Cadre Commercial",
220
+ "assistant de direction": "Assistant de Direction",
221
+ "consultant": "Consultant",
222
+ "tech lead": "Tech Lead",
223
+ "manager": "Manager",
224
+ "senior manager": "Senior Manager",
225
+ "représentant commercial": "Représentant Commercial",
226
+ "representant commercial": "Représentant Commercial",
227
+ "directeur technique": "Directeur Technique",
228
+ "ressources humaines": "Ressources Humaines",
229
+ },
230
+ "frequence_deplacement": {
231
+ "aucun": "Aucun",
232
+ "aucune": "Aucun",
233
+ "occasionnel": "Occasionnel",
234
+ "occasionnelle": "Occasionnel",
235
+ "frequent": "Frequent",
236
+ "fréquent": "Frequent",
237
+ },
238
+ "domaine_etude": {
239
+ "entrepreunariat": "Entrepreunariat",
240
+ "infra & cloud": "Infra & Cloud",
241
+ "infra et cloud": "Infra & Cloud",
242
+ "marketing": "Marketing",
243
+ "ressources humaines": "Ressources Humaines",
244
+ "transformation digitale": "Transformation Digitale",
245
+ },
246
+ "heure_supplementaires": {
247
+ "oui": "Oui",
248
+ "o": "Oui",
249
+ "y": "Oui",
250
+ "non": "Non",
251
+ "n": "Non",
252
+ },
253
+ "niveau_hierarchique_poste": {
254
+ "junior": "Junior",
255
+ "confirmé": "Confirmé",
256
+ "confirme": "Confirmé",
257
+ "direction": "Direction",
258
+ "senior": "Senior",
259
+ },
260
+ "niveau_education": {
261
+ "licence": "Licence",
262
+ "master": "Master",
263
+ "doctorat": "Doctorat",
264
+ "bts": "BTS",
265
+ },
266
+ }
267
 
268
 
269
  def _load_schema(path: Path) -> dict[str, Any]:
 
304
  return []
305
 
306
 
307
+ def _ensure_settings():
308
+ """Ensure configuration settings are available for data fusion."""
309
+
310
+ if SETTINGS is None:
311
+ raise gr.Error(
312
+ "Configuration introuvable. Placez `projet_05/settings.yml` dans le dépôt ou renseignez PROJET05_SETTINGS."
313
+ )
314
+ return SETTINGS
315
+
316
+
317
+ def _get_db_engine(settings: Settings) -> Engine: # pyright: ignore[reportUndefinedVariable]
318
+ global CACHED_ENGINE
319
+ if CACHED_ENGINE is not None:
320
+ return CACHED_ENGINE
321
+ if not settings.db_url:
322
+ raise RuntimeError(
323
+ "Aucune URL de base de données n'a été fournie. Configurez `database.url` dans settings.yml."
324
+ )
325
+ CACHED_ENGINE = create_engine(settings.db_url, future=True)
326
+ return CACHED_ENGINE
327
+
328
+
329
  def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
330
  """Normalize any user input into a validated DataFrame.
331
 
 
351
  return df
352
 
353
 
354
+ def _read_uploaded_csv(upload, label: str) -> pd.DataFrame:
355
+ """Load an uploaded CSV file or raise a user-friendly error."""
356
+
357
+ if upload is None:
358
+ raise gr.Error(f"Veuillez déposer le fichier {label}.")
359
+ try:
360
+ return pd.read_csv(upload.name)
361
+ except Exception as exc: # pragma: no cover - delegated to pandas
362
+ raise gr.Error(f"Impossible de lire le fichier {label}: {exc}") from exc
363
+
364
+
365
+ def _resolve_field_ui(feature: str) -> tuple[str, str, str | None, str, dict[str, Any]]:
366
+ """Return UI metadata (label, placeholder, info, component type, config)."""
367
+
368
+ config = FIELD_UI_LOOKUP.get(feature, {})
369
+ label = config.get("label") or feature.replace("_", " ").capitalize()
370
+ placeholder = config.get("placeholder") or f"Saisir {label.lower()}"
371
+ info = config.get("info")
372
+ component = config.get("component", "textbox")
373
+ return label, placeholder, info, component, config
374
+
375
+
376
+ def _build_input_component(feature: str) -> gr.components.Component: # type: ignore
377
+ """Instantiate the appropriate Gradio component for a feature."""
378
+
379
+ label, placeholder, info, component, config = _resolve_field_ui(feature)
380
+ if component == "dropdown":
381
+ choices = config.get("choices") or []
382
+ default = config.get("default")
383
+ allow_custom = config.get("allow_custom_value", False)
384
+ return gr.Dropdown(
385
+ label=label,
386
+ choices=choices,
387
+ value=default,
388
+ info=info,
389
+ allow_custom_value=allow_custom,
390
+ )
391
+ return gr.Textbox(label=label, placeholder=placeholder, info=info)
392
+
393
+
394
+ def _normalize_categorical_values(df: pd.DataFrame) -> pd.DataFrame:
395
+ """Normalize friendly categorical values into the codes used by the model."""
396
+
397
+ normalized = df.copy()
398
+
399
+ def _normalize_value(value, mapping: dict[str, str]):
400
+ if pd.isna(value):
401
+ return value
402
+ if isinstance(value, str):
403
+ cleaned = value.strip()
404
+ lowered = cleaned.lower()
405
+ return mapping.get(lowered, cleaned)
406
+ return mapping.get(value, value)
407
+
408
+ for column, mapping in CATEGORICAL_NORMALIZERS.items():
409
+ if column not in normalized.columns:
410
+ continue
411
+ normalized[column] = normalized[column].apply(lambda v, m=mapping: _normalize_value(v, m))
412
+ for column in NUMERIC_CODE_COLUMNS:
413
+ if column in normalized.columns:
414
+ extracted = (
415
+ normalized[column]
416
+ .astype(str)
417
+ .str.extract(r"(-?\d+(?:[.,]\d+)?)")[0]
418
+ .str.replace(",", ".", regex=False)
419
+ )
420
+ normalized[column] = pd.to_numeric(extracted, errors="coerce")
421
+ numeric_targets = [col for col in NUMERIC_FEATURES.union(DERIVED_FEATURES).union(NUMERIC_CODE_COLUMNS) if col in normalized.columns]
422
+ for column in numeric_targets:
423
+ normalized[column] = pd.to_numeric(normalized[column], errors="coerce")
424
+ return normalized
425
+
426
+
427
+ def _apply_derived_features(df: pd.DataFrame) -> pd.DataFrame:
428
+ """Recompute engineered ratios so end-users do not have to provide them."""
429
+
430
+ enriched = _normalize_categorical_values(df)
431
+
432
+ def _safe_ratio(numerator: str, denominator: str, output: str) -> None:
433
+ if numerator not in enriched.columns or denominator not in enriched.columns:
434
+ return
435
+ numerator_series = pd.to_numeric(enriched[numerator], errors="coerce")
436
+ denominator_series = pd.to_numeric(enriched[denominator], errors="coerce").replace(0, pd.NA)
437
+ enriched[output] = numerator_series / denominator_series
438
+
439
+ prev_raise_col = "augementation_salaire_precedente"
440
+ if prev_raise_col in enriched:
441
+ normalized = (
442
+ enriched[prev_raise_col]
443
+ .astype(str)
444
+ .str.replace("%", "", regex=False)
445
+ .str.replace(",", ".", regex=False)
446
+ .str.strip()
447
+ )
448
+ enriched[prev_raise_col] = pd.to_numeric(normalized, errors="coerce") / 100
449
+
450
+ _safe_ratio("augementation_salaire_precedente", "revenu_mensuel", "augmentation_par_revenu")
451
+ _safe_ratio("annees_dans_le_poste_actuel", "annee_experience_totale", "annee_sur_poste_par_experience")
452
+ _safe_ratio("nb_formations_suivies", "annee_experience_totale", "nb_formation_par_experience")
453
+ _safe_ratio("annees_depuis_la_derniere_promotion", "annee_experience_totale", "dern_promo_par_experience")
454
+
455
+ existing_sats = [col for col in SATISFACTION_COLUMNS if col in enriched.columns]
456
+ if existing_sats:
457
+ enriched["score_moyen_satisfaction"] = pd.DataFrame(
458
+ {col: pd.to_numeric(enriched[col], errors="coerce") for col in existing_sats}
459
+ ).mean(axis=1)
460
+
461
+ if {"note_evaluation_actuelle", "note_evaluation_precedente"}.issubset(enriched.columns):
462
+ enriched["evolution_note"] = pd.to_numeric(
463
+ enriched["note_evaluation_actuelle"], errors="coerce"
464
+ ) - pd.to_numeric(enriched["note_evaluation_precedente"], errors="coerce")
465
+
466
+ return enriched.replace({pd.NA: np.nan})
467
+
468
+
469
+ def _merge_raw_sources(sirh_upload, evaluation_upload, sond_upload) -> pd.DataFrame:
470
+ """Merge raw SIRH / evaluation / sondage CSVs uploaded by the user."""
471
+
472
+ settings = _ensure_settings()
473
+ sirh = ds.clean_text_values(_read_uploaded_csv(sirh_upload, "SIRH")).pipe(
474
+ ds._harmonize_id_column, settings.col_id, digits_only=True
475
+ )
476
+ evaluation = (
477
+ ds.clean_text_values(_read_uploaded_csv(evaluation_upload, "évaluation"))
478
+ .pipe(ds._rename_column, "eval_number", settings.col_id)
479
+ .pipe(ds._harmonize_id_column, settings.col_id, digits_only=True)
480
+ )
481
+ sond = (
482
+ ds.clean_text_values(_read_uploaded_csv(sond_upload, "sondage"))
483
+ .pipe(ds._rename_column, "code_sondage", settings.col_id)
484
+ .pipe(ds._harmonize_id_column, settings.col_id, digits_only=True)
485
+ )
486
+
487
+ for label, frame in {"SIRH": sirh, "évaluation": evaluation, "sondage": sond}.items():
488
+ if frame.empty:
489
+ raise gr.Error(f"Le fichier {label} est vide ou invalide.")
490
+ if settings.col_id not in frame.columns:
491
+ raise gr.Error(f"La colonne {settings.col_id} est absente du fichier {label}.")
492
+
493
+ merged = sirh.merge(evaluation, on=settings.col_id, how="inner").merge(sond, on=settings.col_id, how="inner")
494
+ if merged.empty:
495
+ raise gr.Error("Aucune ligne résultante après fusion des trois fichiers (jointure INNER vide).")
496
+ return merged
497
+
498
+
499
+ def _log_predictions(source: str, raw_inputs: pd.DataFrame, scored: pd.DataFrame) -> None:
500
+ """Persist user interactions with the ML model into PostgreSQL."""
501
+
502
+ if SETTINGS is None or not SETTINGS.db_url:
503
+ return
504
+ settings = _ensure_settings()
505
+ try:
506
+ engine = _get_db_engine(settings)
507
+ except Exception as exc: # pragma: no cover - logging best effort
508
+ logger.error("Connexion impossible pour logger les interactions: {}", exc)
509
+ return
510
+
511
+ payload = raw_inputs.reindex(scored.index).fillna(value=pd.NA)
512
+ col_id = settings.col_id
513
+ records = []
514
+ for idx, row in scored.iterrows():
515
+ original = payload.loc[idx].to_dict() if idx in payload.index else {} # type: ignore
516
+ records.append(
517
+ {
518
+ "id_employee": row.get(col_id),
519
+ "probability": float(row.get("proba_depart", 0.0)),
520
+ "decision": int(row.get("prediction", 0)),
521
+ "threshold": THRESHOLD,
522
+ "source": source,
523
+ "payload": json.dumps(original, ensure_ascii=False, default=str),
524
+ }
525
+ )
526
+
527
+ if not records:
528
+ return
529
+
530
+ try:
531
+ pd.DataFrame(records).to_sql(
532
+ "prediction_logs",
533
+ engine,
534
+ schema=settings.db_schema,
535
+ if_exists="append",
536
+ index=False,
537
+ method="multi",
538
+ )
539
+ except Exception as exc: # pragma: no cover - logging best effort
540
+ logger.error("Impossible de journaliser les interactions: {}", exc)
541
+
542
+
543
  def _ensure_model():
544
  """Ensure that a pipeline has been loaded before inference."""
545
  if PIPELINE is None:
 
551
  def score_table(table):
552
  """Score data entered via the interactive table."""
553
  _ensure_model()
554
+ df = _convert_input(table, INPUT_FEATURES)
555
+ original = df.copy()
556
+ df = _apply_derived_features(df)
557
  drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
558
+ scored = run_inference(
559
  df,
560
  PIPELINE,
561
  THRESHOLD,
562
  drop_columns=drop_cols,
563
  required_features=FEATURE_ORDER or None,
564
  )
565
+ _log_predictions("interactive_table", original, scored)
566
+ return scored
567
 
568
 
569
  def score_csv(upload):
570
+
571
  """Score a CSV uploaded by the user."""
572
  _ensure_model()
573
  if upload is None:
574
  raise gr.Error("Veuillez déposer un fichier CSV.")
575
  df = pd.read_csv(upload.name)
576
+ original = df.copy()
577
+ df = _apply_derived_features(df)
578
  drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
579
+ scored = run_inference(
580
  df,
581
  PIPELINE,
582
  THRESHOLD,
583
  drop_columns=drop_cols,
584
  required_features=FEATURE_ORDER or None,
585
  )
586
+ _log_predictions("csv_file", original, scored)
587
+ return scored
588
+
589
+
590
+ def score_raw_files(sirh_upload, evaluation_upload, sond_upload):
591
+ """Score three raw CSVs (SIRH, évaluation, sondage) after merging them."""
592
+
593
+ _ensure_model()
594
+ merged = _merge_raw_sources(sirh_upload, evaluation_upload, sond_upload)
595
+ original = merged.copy()
596
+ df = _apply_derived_features(merged)
597
+ drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
598
+ scored = run_inference(
599
+ df,
600
+ PIPELINE,
601
+ THRESHOLD,
602
+ drop_columns=drop_cols,
603
+ required_features=FEATURE_ORDER or None,
604
+ )
605
+ _log_predictions("raw_files", original, scored)
606
+ return scored
607
 
608
 
609
  def predict_from_form(*values):
610
  """Score a single row coming from the form tab."""
611
  _ensure_model()
612
+ if not INPUT_FEATURES:
613
  raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
614
+ payload = {feature: value for feature, value in zip(INPUT_FEATURES, values)}
615
  df = pd.DataFrame([payload])
616
+ original = df.copy()
617
+ df = _apply_derived_features(df)
618
  scored = run_inference(
619
  df,
620
  PIPELINE,
621
  THRESHOLD,
622
  required_features=FEATURE_ORDER or None,
623
  )
624
+ _log_predictions("form", original, scored)
625
  row = scored.iloc[0]
626
  label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
627
  return {
 
649
  logger.warning("Artéfact manquant: {}", exc)
650
 
651
  FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
652
+ INPUT_FEATURES = [feature for feature in FEATURE_ORDER if feature not in DERIVED_FEATURES]
653
+ if not INPUT_FEATURES:
654
+ INPUT_FEATURES = FEATURE_ORDER
655
+ numeric_from_schema = set(SCHEMA.get("numerical_features", []))
656
+ categorical_from_schema = set(SCHEMA.get("categorical_features", []))
657
+ if not numeric_from_schema:
658
+ numeric_from_schema = set((METADATA.get("features", {}).get("numerical") or []))
659
+ if not categorical_from_schema:
660
+ categorical_from_schema = set((METADATA.get("features", {}).get("categorical") or []))
661
+ NUMERIC_FEATURES = numeric_from_schema
662
+ CATEGORICAL_FEATURES = categorical_from_schema
663
 
664
  with gr.Blocks(title="Prédicteur d'attrition") as demo:
665
+ gr.Markdown("# OCR Projet 5 – Prédiction de départ employé")
666
+ gr.HTML(
667
+ """
668
+ <div style="display:flex; gap:0.5rem; flex-wrap:wrap;">
669
+ <a href="https://github.com/stephmnt/OCR_Projet05/releases" target="_blank" rel="noreferrer">
670
+ <img src="https://img.shields.io/github/v/release/stephmnt/OCR_Projet05" alt="GitHub Release" />
671
+ </a>
672
+ <a href="https://github.com/stephmnt/OCR_Projet05/actions/workflows/deploy.yml" target="_blank" rel="noreferrer">
673
+ <img src="https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml" alt="GitHub Actions Workflow Status" />
674
+ </a>
675
+ <a href="https://stephmnt.github.io/OCR_Projet05" target="_blank" rel="noreferrer">
676
+ <img src="https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff" alt="MkDocs" />
677
+ </a>
678
+ </div>
679
+ """
680
+ )
681
  gr.Markdown(
682
+ "Le modèle fournit une probabilité de départ ainsi qu'une décision binaire."
683
  )
684
 
685
  if PIPELINE is None:
 
690
  gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
691
 
692
  with gr.Tab("Formulaire unitaire"):
693
+ if not INPUT_FEATURES:
694
  gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
695
  else:
696
  form_inputs: list[gr.components.Component] = [] # type: ignore
697
+ for feature in INPUT_FEATURES:
698
+ form_inputs.append(_build_input_component(feature))
 
 
699
  form_output = gr.JSON(label="Résultat")
700
  gr.Button("Prédire").click(
701
  fn=predict_from_form,
702
  inputs=form_inputs,
703
  outputs=form_output,
704
  )
 
 
705
  table_input = gr.Dataframe(
706
+ headers=INPUT_FEATURES if INPUT_FEATURES else None,
707
  row_count=(1, "dynamic"),
708
+ col_count=(len(INPUT_FEATURES), "dynamic") if INPUT_FEATURES else (5, "dynamic"),
709
  type="pandas",
710
  )
711
  table_output = gr.Dataframe(label="Prédictions", type="pandas")
 
715
  outputs=table_output,
716
  )
717
 
718
+ with gr.Tab("Fichier CSV fusionné"):
719
+ gr.Markdown("Un exemple de fichier à importer est disponible dans le dépôt github : [`references/sample_employees.csv`](https://github.com/stephmnt/OCR_Projet05/blob/main/references/sample_employees.csv)")
720
  file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
721
  file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
722
  gr.Button("Scorer le fichier").click(
 
725
  outputs=file_output,
726
  )
727
 
728
+ with gr.Tab("Fichiers non-mergés"):
729
+ gr.Markdown(
730
+ "Téléversez directement les trois fichiers bruts (SIRH, évaluation, sondage). "
731
+ "L'application reproduira automatiquement la fusion puis le scoring."
732
+ )
733
+ sirh_input = gr.File(file_types=[".csv"], label="Fichier SIRH")
734
+ evaluation_input = gr.File(file_types=[".csv"], label="Fichier Évaluation")
735
+ sond_input = gr.File(file_types=[".csv"], label="Fichier Sondage")
736
+ raw_output = gr.Dataframe(label="Résultats fusion automatique", type="pandas")
737
+ gr.Button("Fusionner et scorer").click(
738
+ fn=score_raw_files,
739
+ inputs=[sirh_input, evaluation_input, sond_input],
740
+ outputs=raw_output,
741
+ )
742
+
743
 
744
  if __name__ == "__main__":
745
  demo.launch()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml CHANGED
@@ -1,4 +1,4 @@
1
- name: Deploy to Hugging Face Spaces
2
 
3
  on:
4
  push:
@@ -19,12 +19,16 @@ jobs:
19
  - name: Setup Python
20
  uses: actions/setup-python@v5
21
  with:
22
- python-version: "3.10"
23
 
24
  - name: Install dependencies
25
  run: |
26
  python -m pip install --upgrade pip
27
  if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
 
 
 
 
28
 
29
  - name: Deploy to Hugging Face Space
30
  env:
@@ -33,7 +37,7 @@ jobs:
33
  git config --global user.email "actions@github.com"
34
  git config --global user.name "GitHub Actions"
35
  git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
36
- rsync -av --exclude '.git' --exclude 'output/' --exclude 'models/' ./ hf_space/
37
  cd hf_space
38
  git add .
39
  git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
 
1
+ name: Deploiement vers Hugging Face Spaces
2
 
3
  on:
4
  push:
 
19
  - name: Setup Python
20
  uses: actions/setup-python@v5
21
  with:
22
+ python-version: "3.11"
23
 
24
  - name: Install dependencies
25
  run: |
26
  python -m pip install --upgrade pip
27
  if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28
+ pip install -e .
29
+
30
+ - name: Préparer les données et le modèle
31
+ run: python main.py
32
 
33
  - name: Deploy to Hugging Face Space
34
  env:
 
37
  git config --global user.email "actions@github.com"
38
  git config --global user.name "GitHub Actions"
39
  git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
40
+ rsync -av --exclude '.git' --exclude 'docs' ./ hf_space/
41
  cd hf_space
42
  git add .
43
  git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/static.yml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploiement de la documentation
2
+
3
+ on:
4
+ push:
5
+ branches: ["main"]
6
+ workflow_dispatch:
7
+
8
+ permissions:
9
+ contents: read
10
+ pages: write
11
+ id-token: write
12
+
13
+ concurrency:
14
+ group: "pages"
15
+ cancel-in-progress: false
16
+
17
+ jobs:
18
+ deploy:
19
+ environment:
20
+ name: github-pages
21
+ url: ${{ steps.deployment.outputs.page_url }}
22
+ runs-on: ubuntu-latest
23
+ steps:
24
+ - name: Checkout
25
+ uses: actions/checkout@v4
26
+
27
+ - name: Setup Pages
28
+ uses: actions/configure-pages@v5
29
+
30
+ - name: Upload artifact
31
+ uses: actions/upload-pages-artifact@v3
32
+ with:
33
+ path: 'docs/site'
34
+
35
+ - name: Deploy to GitHub Pages
36
+ id: deployment
37
+ uses: actions/deploy-pages@v4
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore CHANGED
@@ -1,19 +1,18 @@
1
- # Data
2
- /data/
3
-
4
- # Mac OS-specific storage files
5
  .DS_Store
6
  *.code-workspace
7
  *.pdf
8
  /output/
9
  questions.md
10
- *.pdf
11
-
 
12
 
13
  # vim
14
  *.swp
15
  *.swo
16
 
 
17
  ## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
18
 
19
  # Byte-compiled / optimized / DLL files
@@ -86,9 +85,6 @@ instance/
86
  # Scrapy stuff:
87
  .scrapy
88
 
89
- # MkDocs documentation
90
- docs/site/
91
-
92
  # PyBuilder
93
  .pybuilder/
94
  target/
@@ -166,9 +162,6 @@ venv.bak/
166
  # Rope project settings
167
  .ropeproject
168
 
169
- # mkdocs documentation
170
- /site
171
-
172
  # mypy
173
  .mypy_cache/
174
  .dmypy.json
 
1
+ # Spécifique à ce projet
 
 
 
2
  .DS_Store
3
  *.code-workspace
4
  *.pdf
5
  /output/
6
  questions.md
7
+ /reports/
8
+ /data/
9
+ runtime.txt
10
 
11
  # vim
12
  *.swp
13
  *.swo
14
 
15
+
16
  ## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
17
 
18
  # Byte-compiled / optimized / DLL files
 
85
  # Scrapy stuff:
86
  .scrapy
87
 
 
 
 
88
  # PyBuilder
89
  .pybuilder/
90
  target/
 
162
  # Rope project settings
163
  .ropeproject
164
 
 
 
 
165
  # mypy
166
  .mypy_cache/
167
  .dmypy.json
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED
@@ -8,15 +8,17 @@ sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: true
10
  short_description: Projet 05 formation Openclassrooms
 
11
  ---
12
 
13
- # projet_05
14
 
15
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
16
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
17
  </a>
18
 
19
- Déployez un modèle de Machine Learning
 
20
 
21
  ## Organisation du projet
22
 
@@ -305,10 +307,6 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
305
  [stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
306
  [issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
307
  [issues-url]: https://github.com/stephmnt/OCR_projet05/issues
308
- [license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
309
- [license-url]: https://github.com/stephmnt/OCR_projet05/blob/master/LICENSE.txt
310
- [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
311
- [linkedin-url]: https://linkedin.com/in/stephanemanet
312
  [product-screenshot]: images/screenshot.png
313
  [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
314
  <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
@@ -328,9 +326,15 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
328
  [Bootstrap-url]: https://getbootstrap.com
329
  [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
330
  [JQuery-url]: https://jquery.com
 
 
 
 
 
331
  <!-- TODO: -->
332
- [Postgres]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
333
- [Python]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
334
- [MkDocs]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
 
335
  [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
336
  [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
 
8
  app_file: app.py
9
  pinned: true
10
  short_description: Projet 05 formation Openclassrooms
11
+ python_version: 3.11
12
  ---
13
 
14
+ # projet_05 : Déployez un modèle de Machine Learning
15
 
16
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
17
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
18
  </a>
19
 
20
+ [![mkdocs-shield]][mkdocs-url]
21
+
22
 
23
  ## Organisation du projet
24
 
 
307
  [stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
308
  [issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
309
  [issues-url]: https://github.com/stephmnt/OCR_projet05/issues
 
 
 
 
310
  [product-screenshot]: images/screenshot.png
311
  [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
312
  <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
 
326
  [Bootstrap-url]: https://getbootstrap.com
327
  [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
328
  [JQuery-url]: https://jquery.com
329
+ <!-- OK -->
330
+ [license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
331
+ [license-url]: https://github.com/stephmnt/OCR_Projet05/blob/main/LICENSE
332
+ [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
333
+ [linkedin-url]: https://linkedin.com/in/stephanemanet
334
  <!-- TODO: -->
335
+ [postgres-shield]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
336
+ [python-shield]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
337
+ [mkdocs-shield]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
338
+ [mkdocs-url]: https://stephmnt.github.io/OCR_Projet05/
339
  [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
340
  [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED
@@ -17,12 +17,30 @@ SCHEMA_PATH = Path("data/processed/schema.json")
17
 
18
 
19
  def _load_schema(path: Path) -> dict[str, Any]:
 
 
 
 
 
 
 
 
20
  if not path.exists():
21
  return {}
22
  return json.loads(path.read_text(encoding="utf-8"))
23
 
24
 
25
  def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
 
 
 
 
 
 
 
 
 
 
26
  if schema:
27
  candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
28
  if candidates:
@@ -37,6 +55,18 @@ def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
37
 
38
 
39
  def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
 
 
40
  if isinstance(payload, pd.DataFrame):
41
  df = payload.copy()
42
  elif payload is None:
@@ -50,6 +80,7 @@ def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
50
 
51
 
52
  def _ensure_model():
 
53
  if PIPELINE is None:
54
  raise gr.Error(
55
  "Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
@@ -57,6 +88,7 @@ def _ensure_model():
57
 
58
 
59
  def score_table(table):
 
60
  _ensure_model()
61
  df = _convert_input(table, FEATURE_ORDER)
62
  drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
@@ -70,6 +102,7 @@ def score_table(table):
70
 
71
 
72
  def score_csv(upload):
 
73
  _ensure_model()
74
  if upload is None:
75
  raise gr.Error("Veuillez déposer un fichier CSV.")
@@ -85,6 +118,7 @@ def score_csv(upload):
85
 
86
 
87
  def predict_from_form(*values):
 
88
  _ensure_model()
89
  if not FEATURE_ORDER:
90
  raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
@@ -132,7 +166,7 @@ with gr.Blocks(title="Prédicteur d'attrition") as demo:
132
 
133
  if PIPELINE is None:
134
  gr.Markdown(
135
- "⚠️ **Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
136
  )
137
  else:
138
  gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
 
17
 
18
 
19
  def _load_schema(path: Path) -> dict[str, Any]:
20
+ """Load the schema definition stored as JSON.
21
+
22
+ Args:
23
+ path: Path to the schema.json file.
24
+
25
+ Returns:
26
+ A dictionary describing the schema or an empty dict if the file is missing.
27
+ """
28
  if not path.exists():
29
  return {}
30
  return json.loads(path.read_text(encoding="utf-8"))
31
 
32
 
33
  def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
34
+ """Infer the ordered list of features expected by the model.
35
+
36
+ Args:
37
+ metadata: Metadata produced during training.
38
+ schema: Schema derived from `features.py`.
39
+ pipeline: Loaded sklearn pipeline (optional).
40
+
41
+ Returns:
42
+ List of feature names in the order expected by the model.
43
+ """
44
  if schema:
45
  candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
46
  if candidates:
 
55
 
56
 
57
  def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
58
+ """Normalize any user input into a validated DataFrame.
59
+
60
+ Args:
61
+ payload: Raw table coming from Gradio (DataFrame, list, etc.).
62
+ headers: Expected column names.
63
+
64
+ Returns:
65
+ A sanitized DataFrame.
66
+
67
+ Raises:
68
+ gr.Error: If no valid row is provided.
69
+ """
70
  if isinstance(payload, pd.DataFrame):
71
  df = payload.copy()
72
  elif payload is None:
 
80
 
81
 
82
  def _ensure_model():
83
+ """Ensure that a pipeline has been loaded before inference."""
84
  if PIPELINE is None:
85
  raise gr.Error(
86
  "Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
 
88
 
89
 
90
  def score_table(table):
91
+ """Score data entered via the interactive table."""
92
  _ensure_model()
93
  df = _convert_input(table, FEATURE_ORDER)
94
  drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
 
102
 
103
 
104
  def score_csv(upload):
105
+ """Score a CSV uploaded by the user."""
106
  _ensure_model()
107
  if upload is None:
108
  raise gr.Error("Veuillez déposer un fichier CSV.")
 
118
 
119
 
120
  def predict_from_form(*values):
121
+ """Score a single row coming from the form tab."""
122
  _ensure_model()
123
  if not FEATURE_ORDER:
124
  raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
 
166
 
167
  if PIPELINE is None:
168
  gr.Markdown(
169
+ "**Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
170
  )
171
  else:
172
  gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore CHANGED
@@ -4,6 +4,11 @@
4
  # Mac OS-specific storage files
5
  .DS_Store
6
  *.code-workspace
 
 
 
 
 
7
 
8
  # vim
9
  *.swp
 
4
  # Mac OS-specific storage files
5
  .DS_Store
6
  *.code-workspace
7
+ *.pdf
8
+ /output/
9
+ questions.md
10
+ *.pdf
11
+
12
 
13
  # vim
14
  *.swp
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED
@@ -1,5 +1,3 @@
1
- # projet_05
2
-
3
  ---
4
  title: OCR_Projet05
5
  emoji: 🔥
@@ -12,6 +10,8 @@ pinned: true
12
  short_description: Projet 05 formation Openclassrooms
13
  ---
14
 
 
 
15
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
16
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
17
  </a>
@@ -76,17 +76,6 @@ Déployez un modèle de Machine Learning
76
 
77
  --------
78
 
79
- ---
80
- title: Projet 05
81
- emoji: 👀
82
- colorFrom: indigo
83
- colorTo: green
84
- sdk: gradio
85
- sdk_version: 5.49.1
86
- app_file: app.py
87
- pinned: false
88
- ---
89
-
90
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
91
 
92
  <!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
@@ -99,8 +88,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
99
  *** Thanks again! Now go create something AMAZING! :D
100
  -->
101
 
102
-
103
-
104
  <!-- PROJECT SHIELDS -->
105
  <!--
106
  *** I'm using markdown "reference style" links for readability.
@@ -118,8 +105,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
118
  [![LinkedIn][linkedin-shield]][linkedin-url]
119
  ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
120
 
121
-
122
-
123
  <!-- PROJECT LOGO -->
124
  <br />
125
  <div align="center">
@@ -143,8 +128,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
143
  </p>
144
  </div>
145
 
146
-
147
-
148
  <!-- TABLE OF CONTENTS -->
149
  <details>
150
  <summary>Table of Contents</summary>
@@ -191,8 +174,6 @@ Here's a blank template to get started. To avoid retyping too much info, do a se
191
 
192
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
193
 
194
-
195
-
196
  <!-- GETTING STARTED -->
197
  ## Getting Started
198
 
@@ -212,20 +193,19 @@ This is an example of how to list things you need to use the software and how to
212
  pip install -r requirements.txt
213
  uvicorn app.main:app --reload
214
 
215
- 1. Get a free API Key at [https://example.com](https://example.com)
216
- 2. Clone the repo
217
  ```sh
218
- git clone https://github.com/github_username/repo_name.git
219
  ```
220
- 3. Install NPM packages
221
  ```sh
222
  npm install
223
  ```
224
- 4. Enter your API in `config.js`
225
  ```js
226
  const API_KEY = 'ENTER YOUR API';
227
  ```
228
- 5. Change git remote url to avoid accidental pushes to base project
229
  ```sh
230
  git remote set-url origin github_username/repo_name
231
  git remote -v # confirm the changes
 
 
 
1
  ---
2
  title: OCR_Projet05
3
  emoji: 🔥
 
10
  short_description: Projet 05 formation Openclassrooms
11
  ---
12
 
13
+ # projet_05
14
+
15
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
16
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
17
  </a>
 
76
 
77
  --------
78
 
 
 
 
 
 
 
 
 
 
 
 
79
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
80
 
81
  <!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
 
88
  *** Thanks again! Now go create something AMAZING! :D
89
  -->
90
 
 
 
91
  <!-- PROJECT SHIELDS -->
92
  <!--
93
  *** I'm using markdown "reference style" links for readability.
 
105
  [![LinkedIn][linkedin-shield]][linkedin-url]
106
  ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
107
 
 
 
108
  <!-- PROJECT LOGO -->
109
  <br />
110
  <div align="center">
 
128
  </p>
129
  </div>
130
 
 
 
131
  <!-- TABLE OF CONTENTS -->
132
  <details>
133
  <summary>Table of Contents</summary>
 
174
 
175
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
176
 
 
 
177
  <!-- GETTING STARTED -->
178
  ## Getting Started
179
 
 
193
  pip install -r requirements.txt
194
  uvicorn app.main:app --reload
195
 
196
+ 1. Clone the repo
 
197
  ```sh
198
+ git clone https://github.com/stephmnt/OCR_Projet05.git
199
  ```
200
+ 2. Install NPM packages
201
  ```sh
202
  npm install
203
  ```
204
+ 3. Enter your API in `config.js`
205
  ```js
206
  const API_KEY = 'ENTER YOUR API';
207
  ```
208
+ 4. Change git remote url to avoid accidental pushes to base project
209
  ```sh
210
  git remote set-url origin github_username/repo_name
211
  git remote -v # confirm the changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml CHANGED
@@ -33,8 +33,8 @@ jobs:
33
  git config --global user.email "actions@github.com"
34
  git config --global user.name "GitHub Actions"
35
  git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
36
- rsync -av --exclude '.git' ./ hf_space/
37
  cd hf_space
38
  git add .
39
  git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
40
- git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
 
33
  git config --global user.email "actions@github.com"
34
  git config --global user.name "GitHub Actions"
35
  git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
36
+ rsync -av --exclude '.git' --exclude 'output/' --exclude 'models/' ./ hf_space/
37
  cd hf_space
38
  git add .
39
  git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
40
+ git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED
@@ -1,5 +1,17 @@
1
  # projet_05
2
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
4
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
5
  </a>
@@ -57,6 +69,11 @@ Déployez un modèle de Machine Learning
57
  └── plots.py <- Code to create visualizations
58
  ```
59
 
 
 
 
 
 
60
  --------
61
 
62
  ---
@@ -93,6 +110,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
93
  *** https://www.markdownguide.org/basic-syntax/#reference-style-links
94
  -->
95
  [![Contributors][contributors-shield]][contributors-url]
 
96
  [![Forks][forks-shield]][forks-url]
97
  [![Stargazers][stars-shield]][stars-url]
98
  [![Issues][issues-shield]][issues-url]
@@ -236,7 +254,7 @@ _For more examples, please refer to the [Documentation](https://example.com)_
236
  - [ ] Feature 3
237
  - [ ] Nested Feature
238
 
239
- See the [open issues](https://github.com/github_username/repo_name/issues) for a full list of proposed features (and known issues).
240
 
241
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
242
 
@@ -299,18 +317,18 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
299
 
300
  <!-- MARKDOWN LINKS & IMAGES -->
301
  <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
302
- [contributors-shield]: https://img.shields.io/github/contributors/github_username/repo_name.svg?style=for-the-badge
303
- [contributors-url]: https://github.com/github_username/repo_name/graphs/contributors
304
- [forks-shield]: https://img.shields.io/github/forks/github_username/repo_name.svg?style=for-the-badge
305
- [forks-url]: https://github.com/github_username/repo_name/network/members
306
- [stars-shield]: https://img.shields.io/github/stars/github_username/repo_name.svg?style=for-the-badge
307
- [stars-url]: https://github.com/github_username/repo_name/stargazers
308
- [issues-shield]: https://img.shields.io/github/issues/github_username/repo_name.svg?style=for-the-badge
309
- [issues-url]: https://github.com/github_username/repo_name/issues
310
- [license-shield]: https://img.shields.io/github/license/github_username/repo_name.svg?style=for-the-badge
311
- [license-url]: https://github.com/github_username/repo_name/blob/master/LICENSE.txt
312
  [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
313
- [linkedin-url]: https://linkedin.com/in/linkedin_username
314
  [product-screenshot]: images/screenshot.png
315
  [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
316
  <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
@@ -331,10 +349,8 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
331
  [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
332
  [JQuery-url]: https://jquery.com
333
  <!-- TODO: -->
334
- [![Postgres](https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white)](#)
335
- [![Python](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)](#)
336
- [![Sphinx](https://img.shields.io/badge/Sphinx-000?logo=sphinx&logoColor=fff)](#)
337
- [![MkDocs](https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff)](#)
338
- [![NumPy](https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff)](#)
339
  [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
340
- [![Slack](https://img.shields.io/badge/Slack-4A154B?logo=slack&logoColor=fff)](#)[text](../projet_04/.gitignore)
 
1
  # projet_05
2
 
3
+ ---
4
+ title: OCR_Projet05
5
+ emoji: 🔥
6
+ colorFrom: purple
7
+ colorTo: purple
8
+ sdk: gradio
9
+ sdk_version: 5.49.1
10
+ app_file: app.py
11
+ pinned: true
12
+ short_description: Projet 05 formation Openclassrooms
13
+ ---
14
+
15
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
16
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
17
  </a>
 
69
  └── plots.py <- Code to create visualizations
70
  ```
71
 
72
+ ## Code hérité réutilisé
73
+
74
+ - `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thème.
75
+ - `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
76
+
77
  --------
78
 
79
  ---
 
110
  *** https://www.markdownguide.org/basic-syntax/#reference-style-links
111
  -->
112
  [![Contributors][contributors-shield]][contributors-url]
113
+ [![Python][python]][python]
114
  [![Forks][forks-shield]][forks-url]
115
  [![Stargazers][stars-shield]][stars-url]
116
  [![Issues][issues-shield]][issues-url]
 
254
  - [ ] Feature 3
255
  - [ ] Nested Feature
256
 
257
+ See the [open issues](https://github.com/stephmnt/OCR_projet05/issues) for a full list of proposed features (and known issues).
258
 
259
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
260
 
 
317
 
318
  <!-- MARKDOWN LINKS & IMAGES -->
319
  <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
320
+ [contributors-shield]: https://img.shields.io/github/contributors/stephmnt/OCR_projet05.svg?style=for-the-badge
321
+ [contributors-url]: https://github.com/stephmnt/OCR_projet05/graphs/contributors
322
+ [forks-shield]: https://img.shields.io/github/forks/stephmnt/OCR_projet05.svg?style=for-the-badge
323
+ [forks-url]: https://github.com/stephmnt/OCR_projet05/network/members
324
+ [stars-shield]: https://img.shields.io/github/stars/stephmnt/OCR_projet05.svg?style=for-the-badge
325
+ [stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
326
+ [issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
327
+ [issues-url]: https://github.com/stephmnt/OCR_projet05/issues
328
+ [license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
329
+ [license-url]: https://github.com/stephmnt/OCR_projet05/blob/master/LICENSE.txt
330
  [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
331
+ [linkedin-url]: https://linkedin.com/in/stephanemanet
332
  [product-screenshot]: images/screenshot.png
333
  [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
334
  <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
 
349
  [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
350
  [JQuery-url]: https://jquery.com
351
  <!-- TODO: -->
352
+ [Postgres]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
353
+ [Python]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
354
+ [MkDocs]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
355
+ [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
 
356
  [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/LICENSE ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ The MIT License (MIT)
3
+ Copyright (c) 2025, Stéphane Manet
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10
+
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Makefile ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #################################################################################
2
+ # GLOBALS #
3
+ #################################################################################
4
+
5
+ PROJECT_NAME = OCR_projet05
6
+ PYTHON_VERSION = 3.10
7
+ PYTHON_INTERPRETER = python
8
+
9
+ #################################################################################
10
+ # COMMANDS #
11
+ #################################################################################
12
+
13
+
14
+ ## Install Python dependencies
15
+ .PHONY: requirements
16
+ requirements:
17
+ pip install -e .
18
+
19
+
20
+
21
+
22
+ ## Delete all compiled Python files
23
+ .PHONY: clean
24
+ clean:
25
+ find . -type f -name "*.py[co]" -delete
26
+ find . -type d -name "__pycache__" -delete
27
+
28
+
29
+ ## Lint using ruff (use `make format` to do formatting)
30
+ .PHONY: lint
31
+ lint:
32
+ ruff format --check
33
+ ruff check
34
+
35
+ ## Format source code with ruff
36
+ .PHONY: format
37
+ format:
38
+ ruff check --fix
39
+ ruff format
40
+
41
+
42
+
43
+ ## Run tests
44
+ .PHONY: test
45
+ test:
46
+ python -m pytest tests
47
+
48
+
49
+ ## Set up Python interpreter environment
50
+ .PHONY: create_environment
51
+ create_environment:
52
+ @bash -c "if [ ! -z `which virtualenvwrapper.sh` ]; then source `which virtualenvwrapper.sh`; mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); else mkvirtualenv.bat $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); fi"
53
+ @echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
54
+
55
+
56
+
57
+
58
+ #################################################################################
59
+ # PROJECT RULES #
60
+ #################################################################################
61
+
62
+
63
+ ## Make dataset
64
+ .PHONY: data
65
+ data: requirements
66
+ $(PYTHON_INTERPRETER) projet_05/dataset.py
67
+
68
+
69
+ #################################################################################
70
+ # Self Documenting Commands #
71
+ #################################################################################
72
+
73
+ .DEFAULT_GOAL := help
74
+
75
+ define PRINT_HELP_PYSCRIPT
76
+ import re, sys; \
77
+ lines = '\n'.join([line for line in sys.stdin]); \
78
+ matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
79
+ print('Available rules:\n'); \
80
+ print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
81
+ endef
82
+ export PRINT_HELP_PYSCRIPT
83
+
84
+ help:
85
+ @$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED
@@ -1,7 +1,181 @@
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
  import gradio as gr
8
+ import pandas as pd
9
+ from loguru import logger
10
+
11
+ from projet_05.branding import apply_brand_theme
12
+ from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
13
+
14
+ MODEL_PATH = Path("models/best_model.joblib")
15
+ METADATA_PATH = Path("models/best_model_meta.json")
16
+ SCHEMA_PATH = Path("data/processed/schema.json")
17
+
18
+
19
+ def _load_schema(path: Path) -> dict[str, Any]:
20
+ if not path.exists():
21
+ return {}
22
+ return json.loads(path.read_text(encoding="utf-8"))
23
+
24
+
25
+ def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
26
+ if schema:
27
+ candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
28
+ if candidates:
29
+ return candidates
30
+ features = metadata.get("features", {})
31
+ explicit = (features.get("numerical") or []) + (features.get("categorical") or [])
32
+ if explicit:
33
+ return explicit
34
+ if pipeline is not None and hasattr(pipeline, "feature_names_in_"):
35
+ return list(pipeline.feature_names_in_)
36
+ return []
37
+
38
+
39
+ def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
40
+ if isinstance(payload, pd.DataFrame):
41
+ df = payload.copy()
42
+ elif payload is None:
43
+ df = pd.DataFrame(columns=headers)
44
+ else:
45
+ df = pd.DataFrame(payload, columns=headers if headers else None)
46
+ df = df.dropna(how="all")
47
+ if df.empty:
48
+ raise gr.Error("Merci de saisir au moins une ligne complète.")
49
+ return df
50
+
51
+
52
+ def _ensure_model():
53
+ if PIPELINE is None:
54
+ raise gr.Error(
55
+ "Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
56
+ )
57
+
58
+
59
+ def score_table(table):
60
+ _ensure_model()
61
+ df = _convert_input(table, FEATURE_ORDER)
62
+ drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
63
+ return run_inference(
64
+ df,
65
+ PIPELINE,
66
+ THRESHOLD,
67
+ drop_columns=drop_cols,
68
+ required_features=FEATURE_ORDER or None,
69
+ )
70
+
71
+
72
+ def score_csv(upload):
73
+ _ensure_model()
74
+ if upload is None:
75
+ raise gr.Error("Veuillez déposer un fichier CSV.")
76
+ df = pd.read_csv(upload.name)
77
+ drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
78
+ return run_inference(
79
+ df,
80
+ PIPELINE,
81
+ THRESHOLD,
82
+ drop_columns=drop_cols,
83
+ required_features=FEATURE_ORDER or None,
84
+ )
85
+
86
+
87
+ def predict_from_form(*values):
88
+ _ensure_model()
89
+ if not FEATURE_ORDER:
90
+ raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
91
+ payload = {feature: value for feature, value in zip(FEATURE_ORDER, values)}
92
+ df = pd.DataFrame([payload])
93
+ scored = run_inference(
94
+ df,
95
+ PIPELINE,
96
+ THRESHOLD,
97
+ required_features=FEATURE_ORDER or None,
98
+ )
99
+ row = scored.iloc[0]
100
+ label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
101
+ return {
102
+ "probability": round(float(row["proba_depart"]), 4),
103
+ "decision": label,
104
+ "threshold": THRESHOLD,
105
+ }
106
+
107
+
108
+ # Chargement des artéfacts
109
+ apply_brand_theme()
110
+
111
+ PIPELINE = None
112
+ METADATA: dict[str, Any] = {}
113
+ THRESHOLD = 0.5
114
+ TARGET_COLUMN: str | None = None
115
+ SCHEMA = _load_schema(SCHEMA_PATH)
116
+
117
+ try:
118
+ PIPELINE = load_pipeline(MODEL_PATH)
119
+ METADATA = load_metadata(METADATA_PATH)
120
+ THRESHOLD = float(METADATA.get("best_threshold", THRESHOLD))
121
+ TARGET_COLUMN = METADATA.get("target")
122
+ except FileNotFoundError as exc:
123
+ logger.warning("Artéfact manquant: {}", exc)
124
+
125
+ FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
126
+
127
+ with gr.Blocks(title="Prédicteur d'attrition") as demo:
128
+ gr.Markdown("# API Gradio – Prédiction de départ employé")
129
+ gr.Markdown(
130
+ "Le modèle applique le pipeline entraîné hors-notebook pour fournir une probabilité de départ ainsi qu'une décision binaire."
131
+ )
132
+
133
+ if PIPELINE is None:
134
+ gr.Markdown(
135
+ "⚠️ **Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
136
+ )
137
+ else:
138
+ gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
139
+
140
+ with gr.Tab("Formulaire unitaire"):
141
+ if not FEATURE_ORDER:
142
+ gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
143
+ else:
144
+ form_inputs: list[gr.components.Component] = [] # type: ignore
145
+ for feature in FEATURE_ORDER:
146
+ form_inputs.append(
147
+ gr.Textbox(label=feature, placeholder=f"Saisir {feature.replace('_', ' ')}")
148
+ )
149
+ form_output = gr.JSON(label="Résultat")
150
+ gr.Button("Prédire").click(
151
+ fn=predict_from_form,
152
+ inputs=form_inputs,
153
+ outputs=form_output,
154
+ )
155
+
156
+ with gr.Tab("Tableau interactif"):
157
+ table_input = gr.Dataframe(
158
+ headers=FEATURE_ORDER if FEATURE_ORDER else None,
159
+ row_count=(1, "dynamic"),
160
+ col_count=(len(FEATURE_ORDER), "dynamic") if FEATURE_ORDER else (5, "dynamic"),
161
+ type="pandas",
162
+ )
163
+ table_output = gr.Dataframe(label="Prédictions", type="pandas")
164
+ gr.Button("Scorer les lignes").click(
165
+ fn=score_table,
166
+ inputs=table_input,
167
+ outputs=table_output,
168
+ )
169
+
170
+ with gr.Tab("Fichier CSV"):
171
+ file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
172
+ file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
173
+ gr.Button("Scorer le fichier").click(
174
+ fn=score_csv,
175
+ inputs=file_input,
176
+ outputs=file_output,
177
+ )
178
 
 
 
179
 
180
+ if __name__ == "__main__":
181
+ demo.launch()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml CHANGED
@@ -1,10 +1,13 @@
1
- name: Déployer vers Hugging Face Spaces
2
 
3
  on:
4
  push:
5
  branches:
6
  - main
7
 
 
 
 
8
  jobs:
9
  deploy:
10
  runs-on: ubuntu-latest
@@ -23,7 +26,7 @@ jobs:
23
  python -m pip install --upgrade pip
24
  if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
25
 
26
- - name: Push to Hugging Face Space
27
  env:
28
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
29
  run: |
@@ -33,5 +36,5 @@ jobs:
33
  rsync -av --exclude '.git' ./ hf_space/
34
  cd hf_space
35
  git add .
36
- git commit -m "🚀 Auto-deploy from GitHub Actions"
37
- git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
 
1
+ name: Deploy to Hugging Face Spaces
2
 
3
  on:
4
  push:
5
  branches:
6
  - main
7
 
8
+ permissions:
9
+ contents: write
10
+
11
  jobs:
12
  deploy:
13
  runs-on: ubuntu-latest
 
26
  python -m pip install --upgrade pip
27
  if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28
 
29
+ - name: Deploy to Hugging Face Space
30
  env:
31
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
32
  run: |
 
36
  rsync -av --exclude '.git' ./ hf_space/
37
  cd hf_space
38
  git add .
39
+ git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
40
+ git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore CHANGED
@@ -1,2 +1,192 @@
 
 
 
 
 
1
  *.code-workspace
2
- .venv/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Data
2
+ /data/
3
+
4
+ # Mac OS-specific storage files
5
+ .DS_Store
6
  *.code-workspace
7
+
8
+ # vim
9
+ *.swp
10
+ *.swo
11
+
12
+ ## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
13
+
14
+ # Byte-compiled / optimized / DLL files
15
+ __pycache__/
16
+ *.py[cod]
17
+ *$py.class
18
+
19
+ # C extensions
20
+ *.so
21
+
22
+ # Distribution / packaging
23
+ .Python
24
+ build/
25
+ develop-eggs/
26
+ dist/
27
+ downloads/
28
+ eggs/
29
+ .eggs/
30
+ lib/
31
+ lib64/
32
+ parts/
33
+ sdist/
34
+ var/
35
+ wheels/
36
+ share/python-wheels/
37
+ *.egg-info/
38
+ .installed.cfg
39
+ *.egg
40
+ MANIFEST
41
+
42
+ # PyInstaller
43
+ # Usually these files are written by a python script from a template
44
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
45
+ *.manifest
46
+ *.spec
47
+
48
+ # Installer logs
49
+ pip-log.txt
50
+ pip-delete-this-directory.txt
51
+
52
+ # Unit test / coverage reports
53
+ htmlcov/
54
+ .tox/
55
+ .nox/
56
+ .coverage
57
+ .coverage.*
58
+ .cache
59
+ nosetests.xml
60
+ coverage.xml
61
+ *.cover
62
+ *.py,cover
63
+ .hypothesis/
64
+ .pytest_cache/
65
+ cover/
66
+
67
+ # Translations
68
+ *.mo
69
+ *.pot
70
+
71
+ # Django stuff:
72
+ *.log
73
+ local_settings.py
74
+ db.sqlite3
75
+ db.sqlite3-journal
76
+
77
+ # Flask stuff:
78
+ instance/
79
+ .webassets-cache
80
+
81
+ # Scrapy stuff:
82
+ .scrapy
83
+
84
+ # MkDocs documentation
85
+ docs/site/
86
+
87
+ # PyBuilder
88
+ .pybuilder/
89
+ target/
90
+
91
+ # Jupyter Notebook
92
+ .ipynb_checkpoints
93
+
94
+ # IPython
95
+ profile_default/
96
+ ipython_config.py
97
+
98
+ # pyenv
99
+ # For a library or package, you might want to ignore these files since the code is
100
+ # intended to run in multiple environments; otherwise, check them in:
101
+ # .python-version
102
+
103
+ # pipenv
104
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
105
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
106
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
107
+ # install all needed dependencies.
108
+ #Pipfile.lock
109
+
110
+ # UV
111
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
112
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
113
+ # commonly ignored for libraries.
114
+ #uv.lock
115
+
116
+ # poetry
117
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
118
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
119
+ # commonly ignored for libraries.
120
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
121
+ #poetry.lock
122
+
123
+ # pdm
124
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
125
+ #pdm.lock
126
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
127
+ # in version control.
128
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
129
+ .pdm.toml
130
+ .pdm-python
131
+ .pdm-build/
132
+
133
+ # pixi
134
+ # pixi.lock should be committed to version control for reproducibility
135
+ # .pixi/ contains the environments and should not be committed
136
+ .pixi/
137
+
138
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
139
+ __pypackages__/
140
+
141
+ # Celery stuff
142
+ celerybeat-schedule
143
+ celerybeat.pid
144
+
145
+ # SageMath parsed files
146
+ *.sage.py
147
+
148
+ # Environments
149
+ .env
150
+ .venv
151
+ env/
152
+ venv/
153
+ ENV/
154
+ env.bak/
155
+ venv.bak/
156
+
157
+ # Spyder project settings
158
+ .spyderproject
159
+ .spyproject
160
+
161
+ # Rope project settings
162
+ .ropeproject
163
+
164
+ # mkdocs documentation
165
+ /site
166
+
167
+ # mypy
168
+ .mypy_cache/
169
+ .dmypy.json
170
+ dmypy.json
171
+
172
+ # Pyre type checker
173
+ .pyre/
174
+
175
+ # pytype static type analyzer
176
+ .pytype/
177
+
178
+ # Cython debug symbols
179
+ cython_debug/
180
+
181
+ # PyCharm
182
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
183
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
184
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
185
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
186
+ #.idea/
187
+
188
+ # Ruff stuff:
189
+ .ruff_cache/
190
+
191
+ # PyPI configuration file
192
+ .pypirc
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED
@@ -1,3 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Projet 05
3
  emoji: 👀
@@ -10,3 +71,270 @@ pinned: false
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # projet_05
2
+
3
+ <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
4
+ <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
5
+ </a>
6
+
7
+ Déployez un modèle de Machine Learning
8
+
9
+ ## Organisation du projet
10
+
11
+ ```
12
+ ├── LICENSE <- Open-source license if one is chosen
13
+ ├── Makefile <- Makefile with convenience commands like `make data` or `make train`
14
+ ├── README.md <- The top-level README for developers using this project.
15
+ ├── data
16
+ │ ├── external <- Data from third party sources.
17
+ │ ├── interim <- Intermediate data that has been transformed.
18
+ │ ├── processed <- The final, canonical data sets for modeling.
19
+ │ └── raw <- The original, immutable data dump.
20
+
21
+ ├── docs <- A default mkdocs project; see www.mkdocs.org for details
22
+
23
+ ├── models <- Trained and serialized models, model predictions, or model summaries
24
+
25
+ ├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
26
+ │ the creator's initials, and a short `-` delimited description, e.g.
27
+ │ `1.0-jqp-initial-data-exploration`.
28
+
29
+ ├── pyproject.toml <- Project configuration file with package metadata for
30
+ │ projet_05 and configuration for tools like black
31
+
32
+ ├── references <- Data dictionaries, manuals, and all other explanatory materials.
33
+
34
+ ├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
35
+ │ └── figures <- Generated graphics and figures to be used in reporting
36
+
37
+ ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
38
+ │ generated with `pip freeze > requirements.txt`
39
+
40
+ ├── setup.cfg <- Configuration file for flake8
41
+
42
+ └── projet_05 <- Source code for use in this project.
43
+
44
+ ├── __init__.py <- Makes projet_05 a Python module
45
+
46
+ ├── config.py <- Store useful variables and configuration
47
+
48
+ ├── dataset.py <- Scripts to download or generate data
49
+
50
+ ├── features.py <- Code to create features for modeling
51
+
52
+ ├── modeling
53
+ │ ├── __init__.py
54
+ │ ├── predict.py <- Code to run model inference with trained models
55
+ │ └── train.py <- Code to train models
56
+
57
+ └── plots.py <- Code to create visualizations
58
+ ```
59
+
60
+ --------
61
+
62
  ---
63
  title: Projet 05
64
  emoji: 👀
 
71
  ---
72
 
73
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
74
+
75
+ <!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
76
+ <a id="readme-top"></a>
77
+ <!--
78
+ *** Thanks for checking out the Best-README-Template. If you have a suggestion
79
+ *** that would make this better, please fork the repo and create a pull request
80
+ *** or simply open an issue with the tag "enhancement".
81
+ *** Don't forget to give the project a star!
82
+ *** Thanks again! Now go create something AMAZING! :D
83
+ -->
84
+
85
+
86
+
87
+ <!-- PROJECT SHIELDS -->
88
+ <!--
89
+ *** I'm using markdown "reference style" links for readability.
90
+ *** Reference links are enclosed in brackets [ ] instead of parentheses ( ).
91
+ *** See the bottom of this document for the declaration of the reference variables
92
+ *** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use.
93
+ *** https://www.markdownguide.org/basic-syntax/#reference-style-links
94
+ -->
95
+ [![Contributors][contributors-shield]][contributors-url]
96
+ [![Forks][forks-shield]][forks-url]
97
+ [![Stargazers][stars-shield]][stars-url]
98
+ [![Issues][issues-shield]][issues-url]
99
+ [![project_license][license-shield]][license-url]
100
+ [![LinkedIn][linkedin-shield]][linkedin-url]
101
+ ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
102
+
103
+
104
+
105
+ <!-- PROJECT LOGO -->
106
+ <br />
107
+ <div align="center">
108
+ <a href="https://github.com/github_username/repo_name">
109
+ <img src="images/logo.png" alt="Logo" width="80" height="80">
110
+ </a>
111
+
112
+ <h3 align="center">project_title</h3>
113
+
114
+ <p align="center">
115
+ project_description
116
+ <br />
117
+ <a href="https://github.com/github_username/repo_name"><strong>Explore the docs »</strong></a>
118
+ <br />
119
+ <br />
120
+ <a href="https://github.com/github_username/repo_name">View Demo</a>
121
+ &middot;
122
+ <a href="https://github.com/github_username/repo_name/issues/new?labels=bug&template=bug-report---.md">Report Bug</a>
123
+ &middot;
124
+ <a href="https://github.com/github_username/repo_name/issues/new?labels=enhancement&template=feature-request---.md">Request Feature</a>
125
+ </p>
126
+ </div>
127
+
128
+
129
+
130
+ <!-- TABLE OF CONTENTS -->
131
+ <details>
132
+ <summary>Table of Contents</summary>
133
+ <ol>
134
+ <li>
135
+ <a href="#about-the-project">About The Project</a>
136
+ <ul>
137
+ <li><a href="#built-with">Built With</a></li>
138
+ </ul>
139
+ </li>
140
+ <li>
141
+ <a href="#getting-started">Getting Started</a>
142
+ <ul>
143
+ <li><a href="#prerequisites">Prerequisites</a></li>
144
+ <li><a href="#installation">Installation</a></li>
145
+ </ul>
146
+ </li>
147
+ <li><a href="#usage">Usage</a></li>
148
+ <li><a href="#roadmap">Roadmap</a></li>
149
+ <li><a href="#contributing">Contributing</a></li>
150
+ <li><a href="#license">License</a></li>
151
+ <li><a href="#contact">Contact</a></li>
152
+ <li><a href="#acknowledgments">Acknowledgments</a></li>
153
+ </ol>
154
+ </details>
155
+
156
+
157
+
158
+ <!-- ABOUT THE PROJECT -->
159
+ ## About The Project
160
+
161
+ [![Product Name Screen Shot][product-screenshot]](https://example.com)
162
+
163
+ Here's a blank template to get started. To avoid retyping too much info, do a search and replace with your text editor for the following: `github_username`, `repo_name`, `twitter_handle`, `linkedin_username`, `email_client`, `email`, `project_title`, `project_description`, `project_license`
164
+
165
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
166
+
167
+
168
+
169
+ ### Built With
170
+
171
+ * [![Python][Python]][Python-url]
172
+ * [![SQL][SQL]][SQL-url]
173
+
174
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
175
+
176
+
177
+
178
+ <!-- GETTING STARTED -->
179
+ ## Getting Started
180
+
181
+ This is an example of how you may give instructions on setting up your project locally.
182
+ To get a local copy up and running follow these simple example steps.
183
+
184
+ ### Prerequisites
185
+
186
+ This is an example of how to list things you need to use the software and how to install them.
187
+ * npm
188
+ ```sh
189
+ npm install npm@latest -g
190
+ ```
191
+
192
+ ### Installation
193
+
194
+ pip install -r requirements.txt
195
+ uvicorn app.main:app --reload
196
+
197
+ 1. Get a free API Key at [https://example.com](https://example.com)
198
+ 2. Clone the repo
199
+ ```sh
200
+ git clone https://github.com/github_username/repo_name.git
201
+ ```
202
+ 3. Install NPM packages
203
+ ```sh
204
+ npm install
205
+ ```
206
+ 4. Enter your API in `config.js`
207
+ ```js
208
+ const API_KEY = 'ENTER YOUR API';
209
+ ```
210
+ 5. Change git remote url to avoid accidental pushes to base project
211
+ ```sh
212
+ git remote set-url origin github_username/repo_name
213
+ git remote -v # confirm the changes
214
+ ```
215
+
216
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
217
+
218
+
219
+
220
+ <!-- USAGE EXAMPLES -->
221
+ ## Usage
222
+
223
+ Use this space to show useful examples of how a project can be used. Additional screenshots, code examples and demos work well in this space. You may also link to more resources.
224
+
225
+ _For more examples, please refer to the [Documentation](https://example.com)_
226
+
227
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
228
+
229
+
230
+
231
+ <!-- ROADMAP -->
232
+ ## Roadmap
233
+
234
+ - [ ] Feature 1
235
+ - [ ] Feature 2
236
+ - [ ] Feature 3
237
+ - [ ] Nested Feature
238
+
239
+ See the [open issues](https://github.com/github_username/repo_name/issues) for a full list of proposed features (and known issues).
240
+
241
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
242
+
243
+
244
+
245
+ <!-- CONTRIBUTING -->
246
+ ## Contributing
247
+
248
+ Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**.
249
+
250
+ If you have a suggestion that would make this better, please fork the repo and create a pull request. You can also simply open an issue with the tag "enhancement".
251
+ Don't forget to give the project a star! Thanks again!
252
+
253
+ 1. Fork the Project
254
+ 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
255
+ 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
256
+ 4. Push to the Branch (`git push origin feature/AmazingFeature`)
257
+ 5. Open a Pull Request
258
+
259
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
260
+
261
+ ### Top contributors:
262
+
263
+ <a href="https://github.com/github_username/repo_name/graphs/contributors">
264
+ <img src="https://contrib.rocks/image?repo=github_username/repo_name" alt="contrib.rocks image" />
265
+ </a>
266
+
267
+
268
+
269
+ <!-- LICENSE -->
270
+ ## License
271
+
272
+ Distributed under the project_license. See `LICENSE.txt` for more information.
273
+
274
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
275
+
276
+
277
+
278
+ <!-- CONTACT -->
279
+ ## Contact
280
+
281
+ Your Name - [@twitter_handle](https://twitter.com/twitter_handle) - email@email_client.com
282
+
283
+ Project Link: [https://github.com/github_username/repo_name](https://github.com/github_username/repo_name)
284
+
285
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
286
+
287
+
288
+
289
+ <!-- ACKNOWLEDGMENTS -->
290
+ ## Acknowledgments
291
+
292
+ * []()
293
+ * []()
294
+ * []()
295
+
296
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
297
+
298
+
299
+
300
+ <!-- MARKDOWN LINKS & IMAGES -->
301
+ <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
302
+ [contributors-shield]: https://img.shields.io/github/contributors/github_username/repo_name.svg?style=for-the-badge
303
+ [contributors-url]: https://github.com/github_username/repo_name/graphs/contributors
304
+ [forks-shield]: https://img.shields.io/github/forks/github_username/repo_name.svg?style=for-the-badge
305
+ [forks-url]: https://github.com/github_username/repo_name/network/members
306
+ [stars-shield]: https://img.shields.io/github/stars/github_username/repo_name.svg?style=for-the-badge
307
+ [stars-url]: https://github.com/github_username/repo_name/stargazers
308
+ [issues-shield]: https://img.shields.io/github/issues/github_username/repo_name.svg?style=for-the-badge
309
+ [issues-url]: https://github.com/github_username/repo_name/issues
310
+ [license-shield]: https://img.shields.io/github/license/github_username/repo_name.svg?style=for-the-badge
311
+ [license-url]: https://github.com/github_username/repo_name/blob/master/LICENSE.txt
312
+ [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
313
+ [linkedin-url]: https://linkedin.com/in/linkedin_username
314
+ [product-screenshot]: images/screenshot.png
315
+ [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
316
+ <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
317
+ [Next.js]: https://img.shields.io/badge/next.js-000000?style=for-the-badge&logo=nextdotjs&logoColor=white
318
+ [Next-url]: https://nextjs.org/
319
+ [React.js]: https://img.shields.io/badge/React-20232A?style=for-the-badge&logo=react&logoColor=61DAFB
320
+ [React-url]: https://reactjs.org/
321
+ [Vue.js]: https://img.shields.io/badge/Vue.js-35495E?style=for-the-badge&logo=vuedotjs&logoColor=4FC08D
322
+ [Vue-url]: https://vuejs.org/
323
+ [Angular.io]: https://img.shields.io/badge/Angular-DD0031?style=for-the-badge&logo=angular&logoColor=white
324
+ [Angular-url]: https://angular.io/
325
+ [Svelte.dev]: https://img.shields.io/badge/Svelte-4A4A55?style=for-the-badge&logo=svelte&logoColor=FF3E00
326
+ [Svelte-url]: https://svelte.dev/
327
+ [Laravel.com]: https://img.shields.io/badge/Laravel-FF2D20?style=for-the-badge&logo=laravel&logoColor=white
328
+ [Laravel-url]: https://laravel.com
329
+ [Bootstrap.com]: https://img.shields.io/badge/Bootstrap-563D7C?style=for-the-badge&logo=bootstrap&logoColor=white
330
+ [Bootstrap-url]: https://getbootstrap.com
331
+ [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
332
+ [JQuery-url]: https://jquery.com
333
+ <!-- TODO: -->
334
+ [![Postgres](https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white)](#)
335
+ [![Python](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)](#)
336
+ [![Sphinx](https://img.shields.io/badge/Sphinx-000?logo=sphinx&logoColor=fff)](#)
337
+ [![MkDocs](https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff)](#)
338
+ [![NumPy](https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff)](#)
339
+ [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
340
+ [![Slack](https://img.shields.io/badge/Slack-4A154B?logo=slack&logoColor=fff)](#)[text](../projet_04/.gitignore)
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ demo.launch()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Déployer vers Hugging Face Spaces
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout repository
14
+ uses: actions/checkout@v4
15
+
16
+ - name: Setup Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.10"
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
25
+
26
+ - name: Push to Hugging Face Space
27
+ env:
28
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
29
+ run: |
30
+ git config --global user.email "actions@github.com"
31
+ git config --global user.name "GitHub Actions"
32
+ git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
33
+ rsync -av --exclude '.git' ./ hf_space/
34
+ cd hf_space
35
+ git add .
36
+ git commit -m "🚀 Auto-deploy from GitHub Actions"
37
+ git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.code-workspace
2
+ .venv/
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Projet 05
3
+ emoji: 👀
4
+ colorFrom: indigo
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 5.49.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ demo.launch()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from app.main import greet
3
+
4
+ def test_greet_returns_string():
5
+ """Vérifie que la fonction retourne bien une chaîne de caractères."""
6
+ result = greet("Alice")
7
+ assert isinstance(result, str), "Le résultat doit être une chaîne de caractères."
8
+
9
+ def test_greet_output_content():
10
+ """Vérifie que la fonction génère la phrase attendue."""
11
+ result = greet("Bob")
12
+ assert result == "Hello Bob!!", f"Résultat inattendu : {result}"
13
+
14
+ def test_greet_with_empty_string():
15
+ """Vérifie le comportement si l’entrée est vide."""
16
+ result = greet("")
17
+ assert result == "Hello !!", "Le résultat doit gérer les entrées vides."
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [virtualenvs]
2
+ in-project = true
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from projet_05 import config # noqa: F401
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/config.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from dotenv import load_dotenv
4
+ from loguru import logger
5
+
6
+ # Load environment variables from .env file if it exists
7
+ load_dotenv()
8
+
9
+ # Paths
10
+ PROJ_ROOT = Path(__file__).resolve().parents[1]
11
+ logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")
12
+
13
+ DATA_DIR = PROJ_ROOT / "data"
14
+ RAW_DATA_DIR = DATA_DIR / "raw"
15
+ INTERIM_DATA_DIR = DATA_DIR / "interim"
16
+ PROCESSED_DATA_DIR = DATA_DIR / "processed"
17
+ EXTERNAL_DATA_DIR = DATA_DIR / "external"
18
+
19
+ MODELS_DIR = PROJ_ROOT / "models"
20
+
21
+ REPORTS_DIR = PROJ_ROOT / "reports"
22
+ FIGURES_DIR = REPORTS_DIR / "figures"
23
+
24
+ # If tqdm is installed, configure loguru with tqdm.write
25
+ # https://github.com/Delgan/loguru/issues/135
26
+ try:
27
+ from tqdm import tqdm
28
+
29
+ logger.remove(0)
30
+ logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
31
+ except ModuleNotFoundError:
32
+ pass
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ input_path: Path = RAW_DATA_DIR / "dataset.csv",
16
+ output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
17
+ # ----------------------------------------------
18
+ ):
19
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
+ logger.info("Processing dataset...")
21
+ for i in tqdm(range(10), total=10):
22
+ if i == 5:
23
+ logger.info("Something happened for iteration 5.")
24
+ logger.success("Processing dataset complete.")
25
+ # -----------------------------------------
26
+
27
+
28
+ if __name__ == "__main__":
29
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
16
+ output_path: Path = PROCESSED_DATA_DIR / "features.csv",
17
+ # -----------------------------------------
18
+ ):
19
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
+ logger.info("Generating features from dataset...")
21
+ for i in tqdm(range(10), total=10):
22
+ if i == 5:
23
+ logger.info("Something happened for iteration 5.")
24
+ logger.success("Features generation complete.")
25
+ # -----------------------------------------
26
+
27
+
28
+ if __name__ == "__main__":
29
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ features_path: Path = PROCESSED_DATA_DIR / "test_features.csv",
16
+ model_path: Path = MODELS_DIR / "model.pkl",
17
+ predictions_path: Path = PROCESSED_DATA_DIR / "test_predictions.csv",
18
+ # -----------------------------------------
19
+ ):
20
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
21
+ logger.info("Performing inference for model...")
22
+ for i in tqdm(range(10), total=10):
23
+ if i == 5:
24
+ logger.info("Something happened for iteration 5.")
25
+ logger.success("Inference complete.")
26
+ # -----------------------------------------
27
+
28
+
29
+ if __name__ == "__main__":
30
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ features_path: Path = PROCESSED_DATA_DIR / "features.csv",
16
+ labels_path: Path = PROCESSED_DATA_DIR / "labels.csv",
17
+ model_path: Path = MODELS_DIR / "model.pkl",
18
+ # -----------------------------------------
19
+ ):
20
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
21
+ logger.info("Training some model...")
22
+ for i in tqdm(range(10), total=10):
23
+ if i == 5:
24
+ logger.info("Something happened for iteration 5.")
25
+ logger.success("Modeling training complete.")
26
+ # -----------------------------------------
27
+
28
+
29
+ if __name__ == "__main__":
30
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/plots.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import FIGURES_DIR, PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
16
+ output_path: Path = FIGURES_DIR / "plot.png",
17
+ # -----------------------------------------
18
+ ):
19
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
+ logger.info("Generating plot from data...")
21
+ for i in tqdm(range(10), total=10):
22
+ if i == 5:
23
+ logger.info("Something happened for iteration 5.")
24
+ logger.success("Plot generation complete.")
25
+ # -----------------------------------------
26
+
27
+
28
+ if __name__ == "__main__":
29
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["flit_core >=3.2,<4"]
3
+ build-backend = "flit_core.buildapi"
4
+
5
+ [project]
6
+ name = "projet_05"
7
+ version = "0.0.1"
8
+ description = "D\u00e9ployez un mod\u00e8le de Machine Learning"
9
+ authors = [
10
+ { name = "St\u00e9phane Manet" },
11
+ ]
12
+ license = { file = "LICENSE" }
13
+ readme = "README.md"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License"
17
+ ]
18
+ dependencies = [
19
+ "loguru",
20
+ "mkdocs",
21
+ "pip",
22
+ "pytest",
23
+ "python-dotenv",
24
+ "ruff",
25
+ "tqdm",
26
+ "typer",
27
+ "imbalanced-learn (>=0.14.0,<0.15.0)",
28
+ "scikit-learn (>=1.4.2,<2.0.0)",
29
+ "matplotlib (>=3.10.7,<4.0.0)",
30
+ "numpy (>=2.3.4,<3.0.0)",
31
+ "pandas (>=2.3.3,<3.0.0)",
32
+ "pyyaml (>=6.0.3,<7.0.0)",
33
+ "scipy (>=1.16.3,<2.0.0)",
34
+ "seaborn (>=0.13.2,<0.14.0)",
35
+ "shap (>=0.49.1,<0.50.0)",
36
+ "gradio (>=5.49.1,<6.0.0)",
37
+ "joblib (>=1.4.2,<2.0.0)"
38
+ ]
39
+
40
+ requires-python = ">=3.11,<3.13"
41
+
42
+
43
+ [tool.ruff]
44
+ line-length = 99
45
+ src = ["projet_05"]
46
+ include = ["pyproject.toml", "projet_05/**/*.py"]
47
+
48
+ [tool.ruff.lint]
49
+ extend-select = ["I"] # Add import sorting
50
+
51
+ [tool.ruff.lint.isort]
52
+ known-first-party = ["projet_05"]
53
+ force-sort-within-sections = true
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/references/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_data.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import pytest
2
+
3
+
4
+ def test_code_is_tested():
5
+ assert False
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py CHANGED
@@ -1 +1,4 @@
1
  from projet_05 import config # noqa: F401
 
 
 
 
1
  from projet_05 import config # noqa: F401
2
+ from projet_05.settings import Settings, load_settings # noqa: F401
3
+
4
+ __all__ = ["config", "Settings", "load_settings"]
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/branding.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from functools import lru_cache
4
+ from pathlib import Path
5
+ from typing import Union
6
+
7
+ from scripts_projet04.brand.brand import ( # type: ignore[import-not-found]
8
+ Theme,
9
+ ThemeConfig,
10
+ configure_brand,
11
+ load_brand,
12
+ make_diverging_cmap,
13
+ )
14
+
15
+ ROOT_DIR = Path(__file__).resolve().parents[1]
16
+ DEFAULT_BRAND_PATH = ROOT_DIR / "scripts_projet04" / "brand" / "brand.yml"
17
+
18
+
19
+ def _resolve_path(path: Union[str, Path, None]) -> Path:
20
+ if path is None:
21
+ return DEFAULT_BRAND_PATH
22
+ return Path(path).expanduser().resolve()
23
+
24
+
25
+ @lru_cache(maxsize=1)
26
+ def load_brand_config(path: Union[str, Path, None] = None) -> ThemeConfig:
27
+ """Load the brand YAML once and return the parsed ThemeConfig."""
28
+ cfg_path = _resolve_path(path)
29
+ return load_brand(cfg_path)
30
+
31
+
32
+ @lru_cache(maxsize=1)
33
+ def apply_brand_theme(path: Union[str, Path, None] = None) -> ThemeConfig:
34
+ """
35
+ Apply the OpenClassrooms/TechNova brand theme globally.
36
+
37
+ Returns the ThemeConfig so callers can inspect colors if needed.
38
+ """
39
+ cfg_path = _resolve_path(path)
40
+ cfg = configure_brand(cfg_path)
41
+ Theme.apply()
42
+ return cfg
43
+
44
+
45
+ __all__ = [
46
+ "Theme",
47
+ "ThemeConfig",
48
+ "apply_brand_theme",
49
+ "load_brand_config",
50
+ "make_diverging_cmap",
51
+ "DEFAULT_BRAND_PATH",
52
+ ]
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py CHANGED
@@ -1,28 +1,202 @@
 
 
 
1
  from pathlib import Path
2
 
 
 
3
  from loguru import logger
4
- from tqdm import tqdm
5
  import typer
6
 
7
- from projet_05.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- app = typer.Typer()
 
 
 
 
10
 
 
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  @app.command()
13
  def main(
14
- # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
- input_path: Path = RAW_DATA_DIR / "dataset.csv",
16
- output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
17
- # ----------------------------------------------
 
 
 
 
 
 
 
 
18
  ):
19
- # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
- logger.info("Processing dataset...")
21
- for i in tqdm(range(10), total=10):
22
- if i == 5:
23
- logger.info("Something happened for iteration 5.")
24
- logger.success("Processing dataset complete.")
25
- # -----------------------------------------
26
 
27
 
28
  if __name__ == "__main__":
 
1
+ from __future__ import annotations
2
+
3
+ import sqlite3
4
  from pathlib import Path
5
 
6
+ import numpy as np
7
+ import pandas as pd
8
  from loguru import logger
 
9
  import typer
10
 
11
+ from projet_05.config import INTERIM_DATA_DIR
12
+ from projet_05.settings import Settings, load_settings
13
+
14
+ app = typer.Typer(help="Préparation et fusion des données sources.")
15
+
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # Utilitaires
19
+ # ---------------------------------------------------------------------------
20
+ def safe_read_csv(path: Path, *, dtype=None) -> pd.DataFrame:
21
+ """Read a CSV file and return an empty frame when it fails."""
22
+ try:
23
+ logger.info("Lecture du fichier {}", path)
24
+ return pd.read_csv(path, dtype=dtype)
25
+ except FileNotFoundError:
26
+ logger.warning("Fichier absent: {}", path)
27
+ return pd.DataFrame()
28
+ except Exception as exc: # pragma: no cover - log + empty dataframe
29
+ logger.error("Impossible de lire {} ({})", path, exc)
30
+ return pd.DataFrame()
31
+
32
+
33
+ def clean_text_values(df: pd.DataFrame) -> pd.DataFrame:
34
+ """Normalize textual values that often materialize missing values."""
35
+ replace_tokens = [
36
+ "",
37
+ " ",
38
+ " ",
39
+ " ",
40
+ "nan",
41
+ "NaN",
42
+ "NAN",
43
+ "None",
44
+ "JE ne sais pas",
45
+ "je ne sais pas",
46
+ "Je ne sais pas",
47
+ "Unknow",
48
+ "Unknown",
49
+ "non pertinent",
50
+ "Non pertinent",
51
+ "NON PERTINENT",
52
+ ]
53
+ normalized = df.copy()
54
+ normalized = normalized.replace(replace_tokens, np.nan)
55
+
56
+ for column in normalized.select_dtypes(include="object"):
57
+ normalized[column] = (
58
+ normalized[column].replace(replace_tokens, np.nan).astype("string").str.strip()
59
+ )
60
+ return normalized
61
+
62
+
63
+ def _harmonize_id_column(df: pd.DataFrame, column: str, *, digits_only: bool = True) -> pd.DataFrame:
64
+ data = df.copy()
65
+ if column not in data.columns:
66
+ return data
67
+
68
+ if digits_only:
69
+ extracted = data[column].astype(str).str.extract(r"(\\d+)")
70
+ data[column] = pd.to_numeric(extracted[0], errors="coerce")
71
+ data[column] = pd.to_numeric(data[column], errors="coerce").astype("Int64")
72
+ return data
73
+
74
+
75
+ def _rename_column(df: pd.DataFrame, source: str, target: str) -> pd.DataFrame:
76
+ if source not in df.columns:
77
+ return df
78
+ return df.rename(columns={source: target})
79
+
80
+
81
+ def _log_id_diagnostics(df: pd.DataFrame, *, name: str, col_id: str) -> None:
82
+ if col_id not in df.columns:
83
+ logger.warning("La colonne {} est absente du fichier {}.", col_id, name)
84
+ return
85
+ total = len(df)
86
+ uniques = df[col_id].nunique(dropna=True)
87
+ duplicates = total - uniques
88
+ logger.info(
89
+ "{name}: {total} lignes | {uniques} identifiants uniques | {duplicates} doublons",
90
+ name=name,
91
+ total=total,
92
+ uniques=uniques,
93
+ duplicates=duplicates,
94
+ )
95
+
96
+
97
+ def _persist_sql_trace(df_dict: dict[str, pd.DataFrame], settings: Settings) -> pd.DataFrame:
98
+ """
99
+ Reproduire la fusion SQL décrite dans le notebook.
100
 
101
+ Chaque DataFrame est stocké dans une base SQLite éphémère pour
102
+ conserver une traçabilité de la requête exécutée.
103
+ """
104
+ db_path = settings.db_file
105
+ sql_path = settings.sql_file
106
 
107
+ db_path.parent.mkdir(parents=True, exist_ok=True)
108
+ sql_path.parent.mkdir(parents=True, exist_ok=True)
109
 
110
+ if db_path.exists():
111
+ db_path.unlink()
112
+
113
+ query = f"""
114
+ SELECT *
115
+ FROM sirh
116
+ INNER JOIN evaluation USING ({settings.col_id})
117
+ INNER JOIN sond USING ({settings.col_id});
118
+ """.strip()
119
+
120
+ with db_path.open("wb") as _:
121
+ pass # just ensure the file exists for sqlite on some platforms
122
+
123
+ with sqlite3.connect(db_path) as conn:
124
+ for name, frame in df_dict.items():
125
+ frame.to_sql(name, conn, index=False, if_exists="replace")
126
+ merged = pd.read_sql_query(query, conn)
127
+
128
+ sql_path.write_text(query, encoding="utf-8")
129
+ return merged
130
+
131
+
132
+ def build_dataset(settings: Settings) -> pd.DataFrame:
133
+ """Load, clean, harmonize and merge the three raw sources."""
134
+ sirh = clean_text_values(
135
+ safe_read_csv(settings.path_sirh).pipe(
136
+ _harmonize_id_column, settings.col_id, digits_only=True
137
+ )
138
+ )
139
+ evaluation = clean_text_values(
140
+ safe_read_csv(settings.path_eval)
141
+ .pipe(_rename_column, "eval_number", settings.col_id)
142
+ .pipe(_harmonize_id_column, settings.col_id, digits_only=True)
143
+ )
144
+ sond = clean_text_values(
145
+ safe_read_csv(settings.path_sondage)
146
+ .pipe(_rename_column, "code_sondage", settings.col_id)
147
+ .pipe(_harmonize_id_column, settings.col_id, digits_only=True)
148
+ )
149
+
150
+ for name, frame in {"sirh": sirh, "evaluation": evaluation, "sond": sond}.items():
151
+ _log_id_diagnostics(frame, name=name, col_id=settings.col_id)
152
+
153
+ frames = {
154
+ "sirh": sirh,
155
+ "evaluation": evaluation,
156
+ "sond": sond,
157
+ }
158
+ merged = _persist_sql_trace(frames, settings)
159
+
160
+ missing_cols = [settings.col_id] if settings.col_id not in merged.columns else []
161
+ if missing_cols:
162
+ raise KeyError(
163
+ f"La colonne {settings.col_id} est absente de la fusion finale. "
164
+ "Vérifiez vos fichiers sources."
165
+ )
166
+
167
+ logger.success("Fusion réalisée: {} lignes / {} colonnes", *merged.shape)
168
+ return merged
169
+
170
+
171
+ def save_dataset(df: pd.DataFrame, output_path: Path) -> None:
172
+ output_path.parent.mkdir(parents=True, exist_ok=True)
173
+ df.to_csv(output_path, index=False)
174
+ logger.success("Fichier fusionné sauvegardé dans {}", output_path)
175
+
176
+
177
+ # ---------------------------------------------------------------------------
178
+ # CLI
179
+ # ---------------------------------------------------------------------------
180
  @app.command()
181
  def main(
182
+ settings_path: Path = typer.Option(
183
+ None,
184
+ "--settings",
185
+ "-s",
186
+ help="Chemin vers un fichier settings.yml personnalisé.",
187
+ ),
188
+ output_path: Path = typer.Option(
189
+ INTERIM_DATA_DIR / "merged.csv",
190
+ "--output",
191
+ "-o",
192
+ help="Chemin de sortie du dataset fusionné.",
193
+ ),
194
  ):
195
+ """Entrypoint Typer pour reproduire la fusion des données brutes."""
196
+
197
+ settings = load_settings(settings_path) if settings_path else load_settings()
198
+ df = build_dataset(settings)
199
+ save_dataset(df, output_path)
 
 
200
 
201
 
202
  if __name__ == "__main__":
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/explainability.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Tuple
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from loguru import logger
9
+
10
+ from projet_05.branding import Theme, apply_brand_theme, make_diverging_cmap
11
+ from scripts_projet04.manet_projet04.shap_generator import ( # type: ignore[import-not-found]
12
+ shap_global,
13
+ shap_local,
14
+ )
15
+
16
+ apply_brand_theme()
17
+
18
+
19
+ def _shape_array(values) -> np.ndarray:
20
+ if hasattr(values, "values"):
21
+ arr = np.array(values.values)
22
+ else:
23
+ arr = np.array(values)
24
+ return np.nan_to_num(arr, copy=False)
25
+
26
+
27
+ def compute_shap_summary(
28
+ pipeline,
29
+ X: pd.DataFrame,
30
+ y: pd.Series,
31
+ *,
32
+ max_samples: int = 500,
33
+ ) -> Tuple[pd.DataFrame | None, object | None]:
34
+ """
35
+ Reuse the historical `shap_global` helper to build the plots and a tabular summary.
36
+
37
+ Returns
38
+ -------
39
+ summary_df : pd.DataFrame | None
40
+ Moyenne absolue des valeurs SHAP (ordre décroissant).
41
+ shap_values : shap.Explanation | None
42
+ Objet renvoyé par shap_global pour des analyses locales ultérieures.
43
+ """
44
+ cmap = make_diverging_cmap(Theme.PRIMARY, Theme.SECONDARY)
45
+ shap_values, _, feature_names = shap_global(
46
+ pipeline,
47
+ X,
48
+ y,
49
+ sample_size=max_samples,
50
+ cmap=cmap,
51
+ )
52
+ if shap_values is None or feature_names is None:
53
+ logger.warning("Impossible de générer les résumés SHAP.")
54
+ return None, None
55
+
56
+ shap_array = _shape_array(shap_values)
57
+ if shap_array.ndim == 1:
58
+ shap_array = shap_array.reshape(-1, 1)
59
+ mean_abs = np.abs(shap_array).mean(axis=0)
60
+ summary = (
61
+ pd.DataFrame({"feature": list(feature_names), "mean_abs_shap": mean_abs})
62
+ .sort_values("mean_abs_shap", ascending=False)
63
+ .reset_index(drop=True)
64
+ )
65
+ return summary, shap_values
66
+
67
+
68
+ def save_shap_summary(summary: pd.DataFrame, output_path: Path) -> None:
69
+ output_path.parent.mkdir(parents=True, exist_ok=True)
70
+ summary.to_csv(output_path, index=False)
71
+ logger.info("Résumé SHAP sauvegardé dans {}", output_path)
72
+
73
+
74
+ def export_local_explanations(
75
+ pipeline,
76
+ shap_values,
77
+ X: pd.DataFrame,
78
+ custom_index: int | None = None,
79
+ ) -> None:
80
+ """
81
+ Génère trois cas d'usage par défaut (impact max, risque max, risque min)
82
+ et un indice custom optionnel pour la trace historique.
83
+ """
84
+ if shap_values is None:
85
+ return
86
+
87
+ shap_array = _shape_array(shap_values)
88
+ idx_impact = int(np.argmax(np.sum(np.abs(shap_array), axis=1)))
89
+ shap_local(idx_impact, shap_values)
90
+
91
+ y_proba_all = pipeline.predict_proba(X)[:, 1]
92
+ idx_highrisk = int(np.argmax(y_proba_all))
93
+ shap_local(idx_highrisk, shap_values)
94
+
95
+ idx_lowrisk = int(np.argmin(y_proba_all))
96
+ shap_local(idx_lowrisk, shap_values, text_scale=0.6)
97
+
98
+ if custom_index is not None:
99
+ shap_local(custom_index, shap_values, max_display=8)
100
+
101
+
102
+ __all__ = ["compute_shap_summary", "save_shap_summary", "export_local_explanations"]