GitHub Actions commited on
Commit
cdef3b2
·
1 Parent(s): 0496652

🚀 Auto-deploy from GitHub Actions

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +3 -0
  2. hf_space/.gitignore +5 -0
  3. hf_space/README.md +7 -27
  4. hf_space/docs/docs/greeter.md +3 -0
  5. hf_space/docs/docs/index.md +2 -7
  6. hf_space/docs/mkdocs.yml +19 -3
  7. hf_space/hf_space/hf_space/.github/workflows/deploy.yml +2 -2
  8. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +34 -18
  9. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/LICENSE +10 -0
  10. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Makefile +85 -0
  11. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +178 -4
  12. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/.gitkeep +0 -0
  13. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/README.md +12 -0
  14. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/docs/getting-started.md +6 -0
  15. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/docs/index.md +10 -0
  16. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/mkdocs.yml +4 -0
  17. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +7 -4
  18. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +191 -1
  19. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +328 -0
  20. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py +0 -0
  21. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py +7 -0
  22. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +37 -0
  23. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes +35 -0
  24. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +2 -0
  25. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +12 -0
  26. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +7 -0
  27. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py +17 -0
  28. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep +0 -0
  29. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
  30. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock +0 -0
  31. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.toml +2 -0
  32. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py +1 -0
  33. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/config.py +32 -0
  34. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py +29 -0
  35. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py +29 -0
  36. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py +0 -0
  37. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py +30 -0
  38. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py +30 -0
  39. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/plots.py +29 -0
  40. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml +53 -0
  41. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/references/.gitkeep +0 -0
  42. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/.gitkeep +0 -0
  43. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep +0 -0
  44. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_data.py +5 -0
  45. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
  46. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py +3 -0
  47. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/branding.py +52 -0
  48. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py +188 -14
  49. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/explainability.py +102 -0
  50. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py +156 -14
.gitignore CHANGED
@@ -4,9 +4,12 @@
4
  # Mac OS-specific storage files
5
  .DS_Store
6
  *.code-workspace
 
 
7
  questions.md
8
  *.pdf
9
 
 
10
  # vim
11
  *.swp
12
  *.swo
 
4
  # Mac OS-specific storage files
5
  .DS_Store
6
  *.code-workspace
7
+ *.pdf
8
+ /output/
9
  questions.md
10
  *.pdf
11
 
12
+
13
  # vim
14
  *.swp
15
  *.swo
hf_space/.gitignore CHANGED
@@ -4,6 +4,11 @@
4
  # Mac OS-specific storage files
5
  .DS_Store
6
  *.code-workspace
 
 
 
 
 
7
 
8
  # vim
9
  *.swp
 
4
  # Mac OS-specific storage files
5
  .DS_Store
6
  *.code-workspace
7
+ *.pdf
8
+ /output/
9
+ questions.md
10
+ *.pdf
11
+
12
 
13
  # vim
14
  *.swp
hf_space/README.md CHANGED
@@ -1,5 +1,3 @@
1
- # projet_05
2
-
3
  ---
4
  title: OCR_Projet05
5
  emoji: 🔥
@@ -12,6 +10,8 @@ pinned: true
12
  short_description: Projet 05 formation Openclassrooms
13
  ---
14
 
 
 
15
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
16
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
17
  </a>
@@ -76,17 +76,6 @@ Déployez un modèle de Machine Learning
76
 
77
  --------
78
 
79
- ---
80
- title: Projet 05
81
- emoji: 👀
82
- colorFrom: indigo
83
- colorTo: green
84
- sdk: gradio
85
- sdk_version: 5.49.1
86
- app_file: app.py
87
- pinned: false
88
- ---
89
-
90
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
91
 
92
  <!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
@@ -99,8 +88,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
99
  *** Thanks again! Now go create something AMAZING! :D
100
  -->
101
 
102
-
103
-
104
  <!-- PROJECT SHIELDS -->
105
  <!--
106
  *** I'm using markdown "reference style" links for readability.
@@ -118,8 +105,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
118
  [![LinkedIn][linkedin-shield]][linkedin-url]
119
  ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
120
 
121
-
122
-
123
  <!-- PROJECT LOGO -->
124
  <br />
125
  <div align="center">
@@ -143,8 +128,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
143
  </p>
144
  </div>
145
 
146
-
147
-
148
  <!-- TABLE OF CONTENTS -->
149
  <details>
150
  <summary>Table of Contents</summary>
@@ -191,8 +174,6 @@ Here's a blank template to get started. To avoid retyping too much info, do a se
191
 
192
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
193
 
194
-
195
-
196
  <!-- GETTING STARTED -->
197
  ## Getting Started
198
 
@@ -212,20 +193,19 @@ This is an example of how to list things you need to use the software and how to
212
  pip install -r requirements.txt
213
  uvicorn app.main:app --reload
214
 
215
- 1. Get a free API Key at [https://example.com](https://example.com)
216
- 2. Clone the repo
217
  ```sh
218
- git clone https://github.com/github_username/repo_name.git
219
  ```
220
- 3. Install NPM packages
221
  ```sh
222
  npm install
223
  ```
224
- 4. Enter your API in `config.js`
225
  ```js
226
  const API_KEY = 'ENTER YOUR API';
227
  ```
228
- 5. Change git remote url to avoid accidental pushes to base project
229
  ```sh
230
  git remote set-url origin github_username/repo_name
231
  git remote -v # confirm the changes
 
 
 
1
  ---
2
  title: OCR_Projet05
3
  emoji: 🔥
 
10
  short_description: Projet 05 formation Openclassrooms
11
  ---
12
 
13
+ # projet_05
14
+
15
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
16
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
17
  </a>
 
76
 
77
  --------
78
 
 
 
 
 
 
 
 
 
 
 
 
79
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
80
 
81
  <!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
 
88
  *** Thanks again! Now go create something AMAZING! :D
89
  -->
90
 
 
 
91
  <!-- PROJECT SHIELDS -->
92
  <!--
93
  *** I'm using markdown "reference style" links for readability.
 
105
  [![LinkedIn][linkedin-shield]][linkedin-url]
106
  ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
107
 
 
 
108
  <!-- PROJECT LOGO -->
109
  <br />
110
  <div align="center">
 
128
  </p>
129
  </div>
130
 
 
 
131
  <!-- TABLE OF CONTENTS -->
132
  <details>
133
  <summary>Table of Contents</summary>
 
174
 
175
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
176
 
 
 
177
  <!-- GETTING STARTED -->
178
  ## Getting Started
179
 
 
193
  pip install -r requirements.txt
194
  uvicorn app.main:app --reload
195
 
196
+ 1. Clone the repo
 
197
  ```sh
198
+ git clone https://github.com/stephmnt/OCR_Projet05.git
199
  ```
200
+ 2. Install NPM packages
201
  ```sh
202
  npm install
203
  ```
204
+ 3. Enter your API in `config.js`
205
  ```js
206
  const API_KEY = 'ENTER YOUR API';
207
  ```
208
+ 4. Change git remote url to avoid accidental pushes to base project
209
  ```sh
210
  git remote set-url origin github_username/repo_name
211
  git remote -v # confirm the changes
hf_space/docs/docs/greeter.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Exemple de classe Greeter
2
+
3
+ ::: references.test.Greeter
hf_space/docs/docs/index.md CHANGED
@@ -1,10 +1,5 @@
1
- # projet_05 documentation!
2
 
3
  ## Description
4
 
5
- Déployez un modèle de Machine Learning
6
-
7
- ## Commands
8
-
9
- The Makefile contains the central entry points for common tasks related to this project.
10
-
 
1
+ # Déployez un modèle de Machine Learning
2
 
3
  ## Description
4
 
5
+ Cette documentation présente la réalisation du projet 05 du master Data scientist Machine Learning
 
 
 
 
 
hf_space/docs/mkdocs.yml CHANGED
@@ -1,4 +1,20 @@
1
- site_name: projet_05
2
- #
3
  site_author: Stéphane Manet
4
- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ site_name: Documentation du projet
 
2
  site_author: Stéphane Manet
3
+ theme:
4
+ name: mkdocs
5
+
6
+ plugins:
7
+ - search
8
+ - mkdocstrings:
9
+ handlers:
10
+ python:
11
+ options:
12
+ show_source: true
13
+ docstring_style: google
14
+ merge_init_into_class: true
15
+
16
+ nav:
17
+ - Accueil: index.md
18
+ - Guide de démarrage: getting-started.md
19
+ - Référence API:
20
+ - Greeter: greeter.md
hf_space/hf_space/hf_space/.github/workflows/deploy.yml CHANGED
@@ -33,8 +33,8 @@ jobs:
33
  git config --global user.email "actions@github.com"
34
  git config --global user.name "GitHub Actions"
35
  git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
36
- rsync -av --exclude '.git' ./ hf_space/
37
  cd hf_space
38
  git add .
39
  git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
40
- git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
 
33
  git config --global user.email "actions@github.com"
34
  git config --global user.name "GitHub Actions"
35
  git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
36
+ rsync -av --exclude '.git' --exclude 'output/' --exclude 'models/' ./ hf_space/
37
  cd hf_space
38
  git add .
39
  git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
40
+ git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED
@@ -1,5 +1,17 @@
1
  # projet_05
2
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
4
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
5
  </a>
@@ -57,6 +69,11 @@ Déployez un modèle de Machine Learning
57
  └── plots.py <- Code to create visualizations
58
  ```
59
 
 
 
 
 
 
60
  --------
61
 
62
  ---
@@ -93,6 +110,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
93
  *** https://www.markdownguide.org/basic-syntax/#reference-style-links
94
  -->
95
  [![Contributors][contributors-shield]][contributors-url]
 
96
  [![Forks][forks-shield]][forks-url]
97
  [![Stargazers][stars-shield]][stars-url]
98
  [![Issues][issues-shield]][issues-url]
@@ -236,7 +254,7 @@ _For more examples, please refer to the [Documentation](https://example.com)_
236
  - [ ] Feature 3
237
  - [ ] Nested Feature
238
 
239
- See the [open issues](https://github.com/github_username/repo_name/issues) for a full list of proposed features (and known issues).
240
 
241
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
242
 
@@ -299,18 +317,18 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
299
 
300
  <!-- MARKDOWN LINKS & IMAGES -->
301
  <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
302
- [contributors-shield]: https://img.shields.io/github/contributors/github_username/repo_name.svg?style=for-the-badge
303
- [contributors-url]: https://github.com/github_username/repo_name/graphs/contributors
304
- [forks-shield]: https://img.shields.io/github/forks/github_username/repo_name.svg?style=for-the-badge
305
- [forks-url]: https://github.com/github_username/repo_name/network/members
306
- [stars-shield]: https://img.shields.io/github/stars/github_username/repo_name.svg?style=for-the-badge
307
- [stars-url]: https://github.com/github_username/repo_name/stargazers
308
- [issues-shield]: https://img.shields.io/github/issues/github_username/repo_name.svg?style=for-the-badge
309
- [issues-url]: https://github.com/github_username/repo_name/issues
310
- [license-shield]: https://img.shields.io/github/license/github_username/repo_name.svg?style=for-the-badge
311
- [license-url]: https://github.com/github_username/repo_name/blob/master/LICENSE.txt
312
  [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
313
- [linkedin-url]: https://linkedin.com/in/linkedin_username
314
  [product-screenshot]: images/screenshot.png
315
  [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
316
  <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
@@ -331,10 +349,8 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
331
  [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
332
  [JQuery-url]: https://jquery.com
333
  <!-- TODO: -->
334
- [![Postgres](https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white)](#)
335
- [![Python](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)](#)
336
- [![Sphinx](https://img.shields.io/badge/Sphinx-000?logo=sphinx&logoColor=fff)](#)
337
- [![MkDocs](https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff)](#)
338
- [![NumPy](https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff)](#)
339
  [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
340
- [![Slack](https://img.shields.io/badge/Slack-4A154B?logo=slack&logoColor=fff)](#)[text](../projet_04/.gitignore)
 
1
  # projet_05
2
 
3
+ ---
4
+ title: OCR_Projet05
5
+ emoji: 🔥
6
+ colorFrom: purple
7
+ colorTo: purple
8
+ sdk: gradio
9
+ sdk_version: 5.49.1
10
+ app_file: app.py
11
+ pinned: true
12
+ short_description: Projet 05 formation Openclassrooms
13
+ ---
14
+
15
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
16
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
17
  </a>
 
69
  └── plots.py <- Code to create visualizations
70
  ```
71
 
72
+ ## Code hérité réutilisé
73
+
74
+ - `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thème.
75
+ - `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
76
+
77
  --------
78
 
79
  ---
 
110
  *** https://www.markdownguide.org/basic-syntax/#reference-style-links
111
  -->
112
  [![Contributors][contributors-shield]][contributors-url]
113
+ [![Python][python]][python]
114
  [![Forks][forks-shield]][forks-url]
115
  [![Stargazers][stars-shield]][stars-url]
116
  [![Issues][issues-shield]][issues-url]
 
254
  - [ ] Feature 3
255
  - [ ] Nested Feature
256
 
257
+ See the [open issues](https://github.com/stephmnt/OCR_projet05/issues) for a full list of proposed features (and known issues).
258
 
259
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
260
 
 
317
 
318
  <!-- MARKDOWN LINKS & IMAGES -->
319
  <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
320
+ [contributors-shield]: https://img.shields.io/github/contributors/stephmnt/OCR_projet05.svg?style=for-the-badge
321
+ [contributors-url]: https://github.com/stephmnt/OCR_projet05/graphs/contributors
322
+ [forks-shield]: https://img.shields.io/github/forks/stephmnt/OCR_projet05.svg?style=for-the-badge
323
+ [forks-url]: https://github.com/stephmnt/OCR_projet05/network/members
324
+ [stars-shield]: https://img.shields.io/github/stars/stephmnt/OCR_projet05.svg?style=for-the-badge
325
+ [stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
326
+ [issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
327
+ [issues-url]: https://github.com/stephmnt/OCR_projet05/issues
328
+ [license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
329
+ [license-url]: https://github.com/stephmnt/OCR_projet05/blob/master/LICENSE.txt
330
  [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
331
+ [linkedin-url]: https://linkedin.com/in/stephanemanet
332
  [product-screenshot]: images/screenshot.png
333
  [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
334
  <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
 
349
  [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
350
  [JQuery-url]: https://jquery.com
351
  <!-- TODO: -->
352
+ [Postgres]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
353
+ [Python]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
354
+ [MkDocs]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
355
+ [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
 
356
  [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/LICENSE ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ The MIT License (MIT)
3
+ Copyright (c) 2025, Stéphane Manet
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10
+
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Makefile ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #################################################################################
2
+ # GLOBALS #
3
+ #################################################################################
4
+
5
+ PROJECT_NAME = OCR_projet05
6
+ PYTHON_VERSION = 3.10
7
+ PYTHON_INTERPRETER = python
8
+
9
+ #################################################################################
10
+ # COMMANDS #
11
+ #################################################################################
12
+
13
+
14
+ ## Install Python dependencies
15
+ .PHONY: requirements
16
+ requirements:
17
+ pip install -e .
18
+
19
+
20
+
21
+
22
+ ## Delete all compiled Python files
23
+ .PHONY: clean
24
+ clean:
25
+ find . -type f -name "*.py[co]" -delete
26
+ find . -type d -name "__pycache__" -delete
27
+
28
+
29
+ ## Lint using ruff (use `make format` to do formatting)
30
+ .PHONY: lint
31
+ lint:
32
+ ruff format --check
33
+ ruff check
34
+
35
+ ## Format source code with ruff
36
+ .PHONY: format
37
+ format:
38
+ ruff check --fix
39
+ ruff format
40
+
41
+
42
+
43
+ ## Run tests
44
+ .PHONY: test
45
+ test:
46
+ python -m pytest tests
47
+
48
+
49
+ ## Set up Python interpreter environment
50
+ .PHONY: create_environment
51
+ create_environment:
52
+ @bash -c "if [ ! -z `which virtualenvwrapper.sh` ]; then source `which virtualenvwrapper.sh`; mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); else mkvirtualenv.bat $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); fi"
53
+ @echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
54
+
55
+
56
+
57
+
58
+ #################################################################################
59
+ # PROJECT RULES #
60
+ #################################################################################
61
+
62
+
63
+ ## Make dataset
64
+ .PHONY: data
65
+ data: requirements
66
+ $(PYTHON_INTERPRETER) projet_05/dataset.py
67
+
68
+
69
+ #################################################################################
70
+ # Self Documenting Commands #
71
+ #################################################################################
72
+
73
+ .DEFAULT_GOAL := help
74
+
75
+ define PRINT_HELP_PYSCRIPT
76
+ import re, sys; \
77
+ lines = '\n'.join([line for line in sys.stdin]); \
78
+ matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
79
+ print('Available rules:\n'); \
80
+ print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
81
+ endef
82
+ export PRINT_HELP_PYSCRIPT
83
+
84
+ help:
85
+ @$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED
@@ -1,7 +1,181 @@
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
  import gradio as gr
8
+ import pandas as pd
9
+ from loguru import logger
10
+
11
+ from projet_05.branding import apply_brand_theme
12
+ from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
13
+
14
+ MODEL_PATH = Path("models/best_model.joblib")
15
+ METADATA_PATH = Path("models/best_model_meta.json")
16
+ SCHEMA_PATH = Path("data/processed/schema.json")
17
+
18
+
19
+ def _load_schema(path: Path) -> dict[str, Any]:
20
+ if not path.exists():
21
+ return {}
22
+ return json.loads(path.read_text(encoding="utf-8"))
23
+
24
+
25
+ def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
26
+ if schema:
27
+ candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
28
+ if candidates:
29
+ return candidates
30
+ features = metadata.get("features", {})
31
+ explicit = (features.get("numerical") or []) + (features.get("categorical") or [])
32
+ if explicit:
33
+ return explicit
34
+ if pipeline is not None and hasattr(pipeline, "feature_names_in_"):
35
+ return list(pipeline.feature_names_in_)
36
+ return []
37
+
38
+
39
+ def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
40
+ if isinstance(payload, pd.DataFrame):
41
+ df = payload.copy()
42
+ elif payload is None:
43
+ df = pd.DataFrame(columns=headers)
44
+ else:
45
+ df = pd.DataFrame(payload, columns=headers if headers else None)
46
+ df = df.dropna(how="all")
47
+ if df.empty:
48
+ raise gr.Error("Merci de saisir au moins une ligne complète.")
49
+ return df
50
+
51
+
52
+ def _ensure_model():
53
+ if PIPELINE is None:
54
+ raise gr.Error(
55
+ "Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
56
+ )
57
+
58
+
59
+ def score_table(table):
60
+ _ensure_model()
61
+ df = _convert_input(table, FEATURE_ORDER)
62
+ drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
63
+ return run_inference(
64
+ df,
65
+ PIPELINE,
66
+ THRESHOLD,
67
+ drop_columns=drop_cols,
68
+ required_features=FEATURE_ORDER or None,
69
+ )
70
+
71
+
72
+ def score_csv(upload):
73
+ _ensure_model()
74
+ if upload is None:
75
+ raise gr.Error("Veuillez déposer un fichier CSV.")
76
+ df = pd.read_csv(upload.name)
77
+ drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
78
+ return run_inference(
79
+ df,
80
+ PIPELINE,
81
+ THRESHOLD,
82
+ drop_columns=drop_cols,
83
+ required_features=FEATURE_ORDER or None,
84
+ )
85
+
86
+
87
+ def predict_from_form(*values):
88
+ _ensure_model()
89
+ if not FEATURE_ORDER:
90
+ raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
91
+ payload = {feature: value for feature, value in zip(FEATURE_ORDER, values)}
92
+ df = pd.DataFrame([payload])
93
+ scored = run_inference(
94
+ df,
95
+ PIPELINE,
96
+ THRESHOLD,
97
+ required_features=FEATURE_ORDER or None,
98
+ )
99
+ row = scored.iloc[0]
100
+ label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
101
+ return {
102
+ "probability": round(float(row["proba_depart"]), 4),
103
+ "decision": label,
104
+ "threshold": THRESHOLD,
105
+ }
106
+
107
+
108
+ # Chargement des artéfacts
109
+ apply_brand_theme()
110
+
111
+ PIPELINE = None
112
+ METADATA: dict[str, Any] = {}
113
+ THRESHOLD = 0.5
114
+ TARGET_COLUMN: str | None = None
115
+ SCHEMA = _load_schema(SCHEMA_PATH)
116
+
117
+ try:
118
+ PIPELINE = load_pipeline(MODEL_PATH)
119
+ METADATA = load_metadata(METADATA_PATH)
120
+ THRESHOLD = float(METADATA.get("best_threshold", THRESHOLD))
121
+ TARGET_COLUMN = METADATA.get("target")
122
+ except FileNotFoundError as exc:
123
+ logger.warning("Artéfact manquant: {}", exc)
124
+
125
+ FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
126
+
127
+ with gr.Blocks(title="Prédicteur d'attrition") as demo:
128
+ gr.Markdown("# API Gradio – Prédiction de départ employé")
129
+ gr.Markdown(
130
+ "Le modèle applique le pipeline entraîné hors-notebook pour fournir une probabilité de départ ainsi qu'une décision binaire."
131
+ )
132
+
133
+ if PIPELINE is None:
134
+ gr.Markdown(
135
+ "⚠️ **Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
136
+ )
137
+ else:
138
+ gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
139
+
140
+ with gr.Tab("Formulaire unitaire"):
141
+ if not FEATURE_ORDER:
142
+ gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
143
+ else:
144
+ form_inputs: list[gr.components.Component] = [] # type: ignore
145
+ for feature in FEATURE_ORDER:
146
+ form_inputs.append(
147
+ gr.Textbox(label=feature, placeholder=f"Saisir {feature.replace('_', ' ')}")
148
+ )
149
+ form_output = gr.JSON(label="Résultat")
150
+ gr.Button("Prédire").click(
151
+ fn=predict_from_form,
152
+ inputs=form_inputs,
153
+ outputs=form_output,
154
+ )
155
+
156
+ with gr.Tab("Tableau interactif"):
157
+ table_input = gr.Dataframe(
158
+ headers=FEATURE_ORDER if FEATURE_ORDER else None,
159
+ row_count=(1, "dynamic"),
160
+ col_count=(len(FEATURE_ORDER), "dynamic") if FEATURE_ORDER else (5, "dynamic"),
161
+ type="pandas",
162
+ )
163
+ table_output = gr.Dataframe(label="Prédictions", type="pandas")
164
+ gr.Button("Scorer les lignes").click(
165
+ fn=score_table,
166
+ inputs=table_input,
167
+ outputs=table_output,
168
+ )
169
+
170
+ with gr.Tab("Fichier CSV"):
171
+ file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
172
+ file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
173
+ gr.Button("Scorer le fichier").click(
174
+ fn=score_csv,
175
+ inputs=file_input,
176
+ outputs=file_output,
177
+ )
178
 
 
 
179
 
180
+ if __name__ == "__main__":
181
+ demo.launch()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generating the docs
2
+ ----------
3
+
4
+ Use [mkdocs](http://www.mkdocs.org/) structure to update the documentation.
5
+
6
+ Build locally with:
7
+
8
+ mkdocs build
9
+
10
+ Serve locally with:
11
+
12
+ mkdocs serve
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/docs/getting-started.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Getting started
2
+ ===============
3
+
4
+ This is where you describe how to get set up on a clean install, including the
5
+ commands necessary to get the raw data (using the `sync_data_from_s3` command,
6
+ for example), and then how to make the cleaned, final data sets.
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/docs/index.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # projet_05 documentation!
2
+
3
+ ## Description
4
+
5
+ Déployez un modèle de Machine Learning
6
+
7
+ ## Commands
8
+
9
+ The Makefile contains the central entry points for common tasks related to this project.
10
+
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/docs/mkdocs.yml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ site_name: projet_05
2
+ #
3
+ site_author: Stéphane Manet
4
+ #
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml CHANGED
@@ -1,10 +1,13 @@
1
- name: Déployer vers Hugging Face Spaces
2
 
3
  on:
4
  push:
5
  branches:
6
  - main
7
 
 
 
 
8
  jobs:
9
  deploy:
10
  runs-on: ubuntu-latest
@@ -23,7 +26,7 @@ jobs:
23
  python -m pip install --upgrade pip
24
  if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
25
 
26
- - name: Push to Hugging Face Space
27
  env:
28
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
29
  run: |
@@ -33,5 +36,5 @@ jobs:
33
  rsync -av --exclude '.git' ./ hf_space/
34
  cd hf_space
35
  git add .
36
- git commit -m "🚀 Auto-deploy from GitHub Actions"
37
- git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
 
1
+ name: Deploy to Hugging Face Spaces
2
 
3
  on:
4
  push:
5
  branches:
6
  - main
7
 
8
+ permissions:
9
+ contents: write
10
+
11
  jobs:
12
  deploy:
13
  runs-on: ubuntu-latest
 
26
  python -m pip install --upgrade pip
27
  if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28
 
29
+ - name: Deploy to Hugging Face Space
30
  env:
31
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
32
  run: |
 
36
  rsync -av --exclude '.git' ./ hf_space/
37
  cd hf_space
38
  git add .
39
+ git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
40
+ git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore CHANGED
@@ -1,2 +1,192 @@
 
 
 
 
 
1
  *.code-workspace
2
- .venv/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Data
2
+ /data/
3
+
4
+ # Mac OS-specific storage files
5
+ .DS_Store
6
  *.code-workspace
7
+
8
+ # vim
9
+ *.swp
10
+ *.swo
11
+
12
+ ## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
13
+
14
+ # Byte-compiled / optimized / DLL files
15
+ __pycache__/
16
+ *.py[cod]
17
+ *$py.class
18
+
19
+ # C extensions
20
+ *.so
21
+
22
+ # Distribution / packaging
23
+ .Python
24
+ build/
25
+ develop-eggs/
26
+ dist/
27
+ downloads/
28
+ eggs/
29
+ .eggs/
30
+ lib/
31
+ lib64/
32
+ parts/
33
+ sdist/
34
+ var/
35
+ wheels/
36
+ share/python-wheels/
37
+ *.egg-info/
38
+ .installed.cfg
39
+ *.egg
40
+ MANIFEST
41
+
42
+ # PyInstaller
43
+ # Usually these files are written by a python script from a template
44
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
45
+ *.manifest
46
+ *.spec
47
+
48
+ # Installer logs
49
+ pip-log.txt
50
+ pip-delete-this-directory.txt
51
+
52
+ # Unit test / coverage reports
53
+ htmlcov/
54
+ .tox/
55
+ .nox/
56
+ .coverage
57
+ .coverage.*
58
+ .cache
59
+ nosetests.xml
60
+ coverage.xml
61
+ *.cover
62
+ *.py,cover
63
+ .hypothesis/
64
+ .pytest_cache/
65
+ cover/
66
+
67
+ # Translations
68
+ *.mo
69
+ *.pot
70
+
71
+ # Django stuff:
72
+ *.log
73
+ local_settings.py
74
+ db.sqlite3
75
+ db.sqlite3-journal
76
+
77
+ # Flask stuff:
78
+ instance/
79
+ .webassets-cache
80
+
81
+ # Scrapy stuff:
82
+ .scrapy
83
+
84
+ # MkDocs documentation
85
+ docs/site/
86
+
87
+ # PyBuilder
88
+ .pybuilder/
89
+ target/
90
+
91
+ # Jupyter Notebook
92
+ .ipynb_checkpoints
93
+
94
+ # IPython
95
+ profile_default/
96
+ ipython_config.py
97
+
98
+ # pyenv
99
+ # For a library or package, you might want to ignore these files since the code is
100
+ # intended to run in multiple environments; otherwise, check them in:
101
+ # .python-version
102
+
103
+ # pipenv
104
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
105
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
106
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
107
+ # install all needed dependencies.
108
+ #Pipfile.lock
109
+
110
+ # UV
111
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
112
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
113
+ # commonly ignored for libraries.
114
+ #uv.lock
115
+
116
+ # poetry
117
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
118
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
119
+ # commonly ignored for libraries.
120
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
121
+ #poetry.lock
122
+
123
+ # pdm
124
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
125
+ #pdm.lock
126
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
127
+ # in version control.
128
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
129
+ .pdm.toml
130
+ .pdm-python
131
+ .pdm-build/
132
+
133
+ # pixi
134
+ # pixi.lock should be committed to version control for reproducibility
135
+ # .pixi/ contains the environments and should not be committed
136
+ .pixi/
137
+
138
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
139
+ __pypackages__/
140
+
141
+ # Celery stuff
142
+ celerybeat-schedule
143
+ celerybeat.pid
144
+
145
+ # SageMath parsed files
146
+ *.sage.py
147
+
148
+ # Environments
149
+ .env
150
+ .venv
151
+ env/
152
+ venv/
153
+ ENV/
154
+ env.bak/
155
+ venv.bak/
156
+
157
+ # Spyder project settings
158
+ .spyderproject
159
+ .spyproject
160
+
161
+ # Rope project settings
162
+ .ropeproject
163
+
164
+ # mkdocs documentation
165
+ /site
166
+
167
+ # mypy
168
+ .mypy_cache/
169
+ .dmypy.json
170
+ dmypy.json
171
+
172
+ # Pyre type checker
173
+ .pyre/
174
+
175
+ # pytype static type analyzer
176
+ .pytype/
177
+
178
+ # Cython debug symbols
179
+ cython_debug/
180
+
181
+ # PyCharm
182
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
183
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
184
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
185
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
186
+ #.idea/
187
+
188
+ # Ruff stuff:
189
+ .ruff_cache/
190
+
191
+ # PyPI configuration file
192
+ .pypirc
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED
@@ -1,3 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Projet 05
3
  emoji: 👀
@@ -10,3 +71,270 @@ pinned: false
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # projet_05
2
+
3
+ <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
4
+ <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
5
+ </a>
6
+
7
+ Déployez un modèle de Machine Learning
8
+
9
+ ## Organisation du projet
10
+
11
+ ```
12
+ ├── LICENSE <- Open-source license if one is chosen
13
+ ├── Makefile <- Makefile with convenience commands like `make data` or `make train`
14
+ ├── README.md <- The top-level README for developers using this project.
15
+ ├── data
16
+ │ ├── external <- Data from third party sources.
17
+ │ ├── interim <- Intermediate data that has been transformed.
18
+ │ ├── processed <- The final, canonical data sets for modeling.
19
+ │ └── raw <- The original, immutable data dump.
20
+
21
+ ├── docs <- A default mkdocs project; see www.mkdocs.org for details
22
+
23
+ ├── models <- Trained and serialized models, model predictions, or model summaries
24
+
25
+ ├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
26
+ │ the creator's initials, and a short `-` delimited description, e.g.
27
+ │ `1.0-jqp-initial-data-exploration`.
28
+
29
+ ├── pyproject.toml <- Project configuration file with package metadata for
30
+ │ projet_05 and configuration for tools like black
31
+
32
+ ├── references <- Data dictionaries, manuals, and all other explanatory materials.
33
+
34
+ ├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
35
+ │ └── figures <- Generated graphics and figures to be used in reporting
36
+
37
+ ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
38
+ │ generated with `pip freeze > requirements.txt`
39
+
40
+ ├── setup.cfg <- Configuration file for flake8
41
+
42
+ └── projet_05 <- Source code for use in this project.
43
+
44
+ ├── __init__.py <- Makes projet_05 a Python module
45
+
46
+ ├── config.py <- Store useful variables and configuration
47
+
48
+ ├── dataset.py <- Scripts to download or generate data
49
+
50
+ ├── features.py <- Code to create features for modeling
51
+
52
+ ├── modeling
53
+ │ ├── __init__.py
54
+ │ ├── predict.py <- Code to run model inference with trained models
55
+ │ └── train.py <- Code to train models
56
+
57
+ └── plots.py <- Code to create visualizations
58
+ ```
59
+
60
+ --------
61
+
62
  ---
63
  title: Projet 05
64
  emoji: 👀
 
71
  ---
72
 
73
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
74
+
75
+ <!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
76
+ <a id="readme-top"></a>
77
+ <!--
78
+ *** Thanks for checking out the Best-README-Template. If you have a suggestion
79
+ *** that would make this better, please fork the repo and create a pull request
80
+ *** or simply open an issue with the tag "enhancement".
81
+ *** Don't forget to give the project a star!
82
+ *** Thanks again! Now go create something AMAZING! :D
83
+ -->
84
+
85
+
86
+
87
+ <!-- PROJECT SHIELDS -->
88
+ <!--
89
+ *** I'm using markdown "reference style" links for readability.
90
+ *** Reference links are enclosed in brackets [ ] instead of parentheses ( ).
91
+ *** See the bottom of this document for the declaration of the reference variables
92
+ *** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use.
93
+ *** https://www.markdownguide.org/basic-syntax/#reference-style-links
94
+ -->
95
+ [![Contributors][contributors-shield]][contributors-url]
96
+ [![Forks][forks-shield]][forks-url]
97
+ [![Stargazers][stars-shield]][stars-url]
98
+ [![Issues][issues-shield]][issues-url]
99
+ [![project_license][license-shield]][license-url]
100
+ [![LinkedIn][linkedin-shield]][linkedin-url]
101
+ ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
102
+
103
+
104
+
105
+ <!-- PROJECT LOGO -->
106
+ <br />
107
+ <div align="center">
108
+ <a href="https://github.com/github_username/repo_name">
109
+ <img src="images/logo.png" alt="Logo" width="80" height="80">
110
+ </a>
111
+
112
+ <h3 align="center">project_title</h3>
113
+
114
+ <p align="center">
115
+ project_description
116
+ <br />
117
+ <a href="https://github.com/github_username/repo_name"><strong>Explore the docs »</strong></a>
118
+ <br />
119
+ <br />
120
+ <a href="https://github.com/github_username/repo_name">View Demo</a>
121
+ &middot;
122
+ <a href="https://github.com/github_username/repo_name/issues/new?labels=bug&template=bug-report---.md">Report Bug</a>
123
+ &middot;
124
+ <a href="https://github.com/github_username/repo_name/issues/new?labels=enhancement&template=feature-request---.md">Request Feature</a>
125
+ </p>
126
+ </div>
127
+
128
+
129
+
130
+ <!-- TABLE OF CONTENTS -->
131
+ <details>
132
+ <summary>Table of Contents</summary>
133
+ <ol>
134
+ <li>
135
+ <a href="#about-the-project">About The Project</a>
136
+ <ul>
137
+ <li><a href="#built-with">Built With</a></li>
138
+ </ul>
139
+ </li>
140
+ <li>
141
+ <a href="#getting-started">Getting Started</a>
142
+ <ul>
143
+ <li><a href="#prerequisites">Prerequisites</a></li>
144
+ <li><a href="#installation">Installation</a></li>
145
+ </ul>
146
+ </li>
147
+ <li><a href="#usage">Usage</a></li>
148
+ <li><a href="#roadmap">Roadmap</a></li>
149
+ <li><a href="#contributing">Contributing</a></li>
150
+ <li><a href="#license">License</a></li>
151
+ <li><a href="#contact">Contact</a></li>
152
+ <li><a href="#acknowledgments">Acknowledgments</a></li>
153
+ </ol>
154
+ </details>
155
+
156
+
157
+
158
+ <!-- ABOUT THE PROJECT -->
159
+ ## About The Project
160
+
161
+ [![Product Name Screen Shot][product-screenshot]](https://example.com)
162
+
163
+ Here's a blank template to get started. To avoid retyping too much info, do a search and replace with your text editor for the following: `github_username`, `repo_name`, `twitter_handle`, `linkedin_username`, `email_client`, `email`, `project_title`, `project_description`, `project_license`
164
+
165
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
166
+
167
+
168
+
169
+ ### Built With
170
+
171
+ * [![Python][Python]][Python-url]
172
+ * [![SQL][SQL]][SQL-url]
173
+
174
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
175
+
176
+
177
+
178
+ <!-- GETTING STARTED -->
179
+ ## Getting Started
180
+
181
+ This is an example of how you may give instructions on setting up your project locally.
182
+ To get a local copy up and running follow these simple example steps.
183
+
184
+ ### Prerequisites
185
+
186
+ This is an example of how to list things you need to use the software and how to install them.
187
+ * npm
188
+ ```sh
189
+ npm install npm@latest -g
190
+ ```
191
+
192
+ ### Installation
193
+
194
+ pip install -r requirements.txt
195
+ uvicorn app.main:app --reload
196
+
197
+ 1. Get a free API Key at [https://example.com](https://example.com)
198
+ 2. Clone the repo
199
+ ```sh
200
+ git clone https://github.com/github_username/repo_name.git
201
+ ```
202
+ 3. Install NPM packages
203
+ ```sh
204
+ npm install
205
+ ```
206
+ 4. Enter your API in `config.js`
207
+ ```js
208
+ const API_KEY = 'ENTER YOUR API';
209
+ ```
210
+ 5. Change git remote url to avoid accidental pushes to base project
211
+ ```sh
212
+ git remote set-url origin github_username/repo_name
213
+ git remote -v # confirm the changes
214
+ ```
215
+
216
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
217
+
218
+
219
+
220
+ <!-- USAGE EXAMPLES -->
221
+ ## Usage
222
+
223
+ Use this space to show useful examples of how a project can be used. Additional screenshots, code examples and demos work well in this space. You may also link to more resources.
224
+
225
+ _For more examples, please refer to the [Documentation](https://example.com)_
226
+
227
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
228
+
229
+
230
+
231
+ <!-- ROADMAP -->
232
+ ## Roadmap
233
+
234
+ - [ ] Feature 1
235
+ - [ ] Feature 2
236
+ - [ ] Feature 3
237
+ - [ ] Nested Feature
238
+
239
+ See the [open issues](https://github.com/github_username/repo_name/issues) for a full list of proposed features (and known issues).
240
+
241
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
242
+
243
+
244
+
245
+ <!-- CONTRIBUTING -->
246
+ ## Contributing
247
+
248
+ Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**.
249
+
250
+ If you have a suggestion that would make this better, please fork the repo and create a pull request. You can also simply open an issue with the tag "enhancement".
251
+ Don't forget to give the project a star! Thanks again!
252
+
253
+ 1. Fork the Project
254
+ 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
255
+ 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
256
+ 4. Push to the Branch (`git push origin feature/AmazingFeature`)
257
+ 5. Open a Pull Request
258
+
259
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
260
+
261
+ ### Top contributors:
262
+
263
+ <a href="https://github.com/github_username/repo_name/graphs/contributors">
264
+ <img src="https://contrib.rocks/image?repo=github_username/repo_name" alt="contrib.rocks image" />
265
+ </a>
266
+
267
+
268
+
269
+ <!-- LICENSE -->
270
+ ## License
271
+
272
+ Distributed under the project_license. See `LICENSE.txt` for more information.
273
+
274
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
275
+
276
+
277
+
278
+ <!-- CONTACT -->
279
+ ## Contact
280
+
281
+ Your Name - [@twitter_handle](https://twitter.com/twitter_handle) - email@email_client.com
282
+
283
+ Project Link: [https://github.com/github_username/repo_name](https://github.com/github_username/repo_name)
284
+
285
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
286
+
287
+
288
+
289
+ <!-- ACKNOWLEDGMENTS -->
290
+ ## Acknowledgments
291
+
292
+ * []()
293
+ * []()
294
+ * []()
295
+
296
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
297
+
298
+
299
+
300
+ <!-- MARKDOWN LINKS & IMAGES -->
301
+ <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
302
+ [contributors-shield]: https://img.shields.io/github/contributors/github_username/repo_name.svg?style=for-the-badge
303
+ [contributors-url]: https://github.com/github_username/repo_name/graphs/contributors
304
+ [forks-shield]: https://img.shields.io/github/forks/github_username/repo_name.svg?style=for-the-badge
305
+ [forks-url]: https://github.com/github_username/repo_name/network/members
306
+ [stars-shield]: https://img.shields.io/github/stars/github_username/repo_name.svg?style=for-the-badge
307
+ [stars-url]: https://github.com/github_username/repo_name/stargazers
308
+ [issues-shield]: https://img.shields.io/github/issues/github_username/repo_name.svg?style=for-the-badge
309
+ [issues-url]: https://github.com/github_username/repo_name/issues
310
+ [license-shield]: https://img.shields.io/github/license/github_username/repo_name.svg?style=for-the-badge
311
+ [license-url]: https://github.com/github_username/repo_name/blob/master/LICENSE.txt
312
+ [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
313
+ [linkedin-url]: https://linkedin.com/in/linkedin_username
314
+ [product-screenshot]: images/screenshot.png
315
+ [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
316
+ <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
317
+ [Next.js]: https://img.shields.io/badge/next.js-000000?style=for-the-badge&logo=nextdotjs&logoColor=white
318
+ [Next-url]: https://nextjs.org/
319
+ [React.js]: https://img.shields.io/badge/React-20232A?style=for-the-badge&logo=react&logoColor=61DAFB
320
+ [React-url]: https://reactjs.org/
321
+ [Vue.js]: https://img.shields.io/badge/Vue.js-35495E?style=for-the-badge&logo=vuedotjs&logoColor=4FC08D
322
+ [Vue-url]: https://vuejs.org/
323
+ [Angular.io]: https://img.shields.io/badge/Angular-DD0031?style=for-the-badge&logo=angular&logoColor=white
324
+ [Angular-url]: https://angular.io/
325
+ [Svelte.dev]: https://img.shields.io/badge/Svelte-4A4A55?style=for-the-badge&logo=svelte&logoColor=FF3E00
326
+ [Svelte-url]: https://svelte.dev/
327
+ [Laravel.com]: https://img.shields.io/badge/Laravel-FF2D20?style=for-the-badge&logo=laravel&logoColor=white
328
+ [Laravel-url]: https://laravel.com
329
+ [Bootstrap.com]: https://img.shields.io/badge/Bootstrap-563D7C?style=for-the-badge&logo=bootstrap&logoColor=white
330
+ [Bootstrap-url]: https://getbootstrap.com
331
+ [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
332
+ [JQuery-url]: https://jquery.com
333
+ <!-- TODO: -->
334
+ [![Postgres](https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white)](#)
335
+ [![Python](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)](#)
336
+ [![Sphinx](https://img.shields.io/badge/Sphinx-000?logo=sphinx&logoColor=fff)](#)
337
+ [![MkDocs](https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff)](#)
338
+ [![NumPy](https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff)](#)
339
+ [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
340
+ [![Slack](https://img.shields.io/badge/Slack-4A154B?logo=slack&logoColor=fff)](#)[text](../projet_04/.gitignore)
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ demo.launch()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Déployer vers Hugging Face Spaces
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout repository
14
+ uses: actions/checkout@v4
15
+
16
+ - name: Setup Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.10"
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
25
+
26
+ - name: Push to Hugging Face Space
27
+ env:
28
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
29
+ run: |
30
+ git config --global user.email "actions@github.com"
31
+ git config --global user.name "GitHub Actions"
32
+ git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
33
+ rsync -av --exclude '.git' ./ hf_space/
34
+ cd hf_space
35
+ git add .
36
+ git commit -m "🚀 Auto-deploy from GitHub Actions"
37
+ git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.code-workspace
2
+ .venv/
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Projet 05
3
+ emoji: 👀
4
+ colorFrom: indigo
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 5.49.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ demo.launch()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from app.main import greet
3
+
4
+ def test_greet_returns_string():
5
+ """Vérifie que la fonction retourne bien une chaîne de caractères."""
6
+ result = greet("Alice")
7
+ assert isinstance(result, str), "Le résultat doit être une chaîne de caractères."
8
+
9
+ def test_greet_output_content():
10
+ """Vérifie que la fonction génère la phrase attendue."""
11
+ result = greet("Bob")
12
+ assert result == "Hello Bob!!", f"Résultat inattendu : {result}"
13
+
14
+ def test_greet_with_empty_string():
15
+ """Vérifie le comportement si l’entrée est vide."""
16
+ result = greet("")
17
+ assert result == "Hello !!", "Le résultat doit gérer les entrées vides."
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [virtualenvs]
2
+ in-project = true
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from projet_05 import config # noqa: F401
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/config.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from dotenv import load_dotenv
4
+ from loguru import logger
5
+
6
+ # Load environment variables from .env file if it exists
7
+ load_dotenv()
8
+
9
+ # Paths
10
+ PROJ_ROOT = Path(__file__).resolve().parents[1]
11
+ logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")
12
+
13
+ DATA_DIR = PROJ_ROOT / "data"
14
+ RAW_DATA_DIR = DATA_DIR / "raw"
15
+ INTERIM_DATA_DIR = DATA_DIR / "interim"
16
+ PROCESSED_DATA_DIR = DATA_DIR / "processed"
17
+ EXTERNAL_DATA_DIR = DATA_DIR / "external"
18
+
19
+ MODELS_DIR = PROJ_ROOT / "models"
20
+
21
+ REPORTS_DIR = PROJ_ROOT / "reports"
22
+ FIGURES_DIR = REPORTS_DIR / "figures"
23
+
24
+ # If tqdm is installed, configure loguru with tqdm.write
25
+ # https://github.com/Delgan/loguru/issues/135
26
+ try:
27
+ from tqdm import tqdm
28
+
29
+ logger.remove(0)
30
+ logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
31
+ except ModuleNotFoundError:
32
+ pass
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ input_path: Path = RAW_DATA_DIR / "dataset.csv",
16
+ output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
17
+ # ----------------------------------------------
18
+ ):
19
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
+ logger.info("Processing dataset...")
21
+ for i in tqdm(range(10), total=10):
22
+ if i == 5:
23
+ logger.info("Something happened for iteration 5.")
24
+ logger.success("Processing dataset complete.")
25
+ # -----------------------------------------
26
+
27
+
28
+ if __name__ == "__main__":
29
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
16
+ output_path: Path = PROCESSED_DATA_DIR / "features.csv",
17
+ # -----------------------------------------
18
+ ):
19
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
+ logger.info("Generating features from dataset...")
21
+ for i in tqdm(range(10), total=10):
22
+ if i == 5:
23
+ logger.info("Something happened for iteration 5.")
24
+ logger.success("Features generation complete.")
25
+ # -----------------------------------------
26
+
27
+
28
+ if __name__ == "__main__":
29
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ features_path: Path = PROCESSED_DATA_DIR / "test_features.csv",
16
+ model_path: Path = MODELS_DIR / "model.pkl",
17
+ predictions_path: Path = PROCESSED_DATA_DIR / "test_predictions.csv",
18
+ # -----------------------------------------
19
+ ):
20
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
21
+ logger.info("Performing inference for model...")
22
+ for i in tqdm(range(10), total=10):
23
+ if i == 5:
24
+ logger.info("Something happened for iteration 5.")
25
+ logger.success("Inference complete.")
26
+ # -----------------------------------------
27
+
28
+
29
+ if __name__ == "__main__":
30
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ features_path: Path = PROCESSED_DATA_DIR / "features.csv",
16
+ labels_path: Path = PROCESSED_DATA_DIR / "labels.csv",
17
+ model_path: Path = MODELS_DIR / "model.pkl",
18
+ # -----------------------------------------
19
+ ):
20
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
21
+ logger.info("Training some model...")
22
+ for i in tqdm(range(10), total=10):
23
+ if i == 5:
24
+ logger.info("Something happened for iteration 5.")
25
+ logger.success("Modeling training complete.")
26
+ # -----------------------------------------
27
+
28
+
29
+ if __name__ == "__main__":
30
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/plots.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import FIGURES_DIR, PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
16
+ output_path: Path = FIGURES_DIR / "plot.png",
17
+ # -----------------------------------------
18
+ ):
19
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
+ logger.info("Generating plot from data...")
21
+ for i in tqdm(range(10), total=10):
22
+ if i == 5:
23
+ logger.info("Something happened for iteration 5.")
24
+ logger.success("Plot generation complete.")
25
+ # -----------------------------------------
26
+
27
+
28
+ if __name__ == "__main__":
29
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["flit_core >=3.2,<4"]
3
+ build-backend = "flit_core.buildapi"
4
+
5
+ [project]
6
+ name = "projet_05"
7
+ version = "0.0.1"
8
+ description = "D\u00e9ployez un mod\u00e8le de Machine Learning"
9
+ authors = [
10
+ { name = "St\u00e9phane Manet" },
11
+ ]
12
+ license = { file = "LICENSE" }
13
+ readme = "README.md"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License"
17
+ ]
18
+ dependencies = [
19
+ "loguru",
20
+ "mkdocs",
21
+ "pip",
22
+ "pytest",
23
+ "python-dotenv",
24
+ "ruff",
25
+ "tqdm",
26
+ "typer",
27
+ "imbalanced-learn (>=0.14.0,<0.15.0)",
28
+ "scikit-learn (>=1.4.2,<2.0.0)",
29
+ "matplotlib (>=3.10.7,<4.0.0)",
30
+ "numpy (>=2.3.4,<3.0.0)",
31
+ "pandas (>=2.3.3,<3.0.0)",
32
+ "pyyaml (>=6.0.3,<7.0.0)",
33
+ "scipy (>=1.16.3,<2.0.0)",
34
+ "seaborn (>=0.13.2,<0.14.0)",
35
+ "shap (>=0.49.1,<0.50.0)",
36
+ "gradio (>=5.49.1,<6.0.0)",
37
+ "joblib (>=1.4.2,<2.0.0)"
38
+ ]
39
+
40
+ requires-python = ">=3.11,<3.13"
41
+
42
+
43
+ [tool.ruff]
44
+ line-length = 99
45
+ src = ["projet_05"]
46
+ include = ["pyproject.toml", "projet_05/**/*.py"]
47
+
48
+ [tool.ruff.lint]
49
+ extend-select = ["I"] # Add import sorting
50
+
51
+ [tool.ruff.lint.isort]
52
+ known-first-party = ["projet_05"]
53
+ force-sort-within-sections = true
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/references/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_data.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import pytest
2
+
3
+
4
+ def test_code_is_tested():
5
+ assert False
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py CHANGED
@@ -1 +1,4 @@
1
  from projet_05 import config # noqa: F401
 
 
 
 
1
  from projet_05 import config # noqa: F401
2
+ from projet_05.settings import Settings, load_settings # noqa: F401
3
+
4
+ __all__ = ["config", "Settings", "load_settings"]
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/branding.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from functools import lru_cache
4
+ from pathlib import Path
5
+ from typing import Union
6
+
7
+ from scripts_projet04.brand.brand import ( # type: ignore[import-not-found]
8
+ Theme,
9
+ ThemeConfig,
10
+ configure_brand,
11
+ load_brand,
12
+ make_diverging_cmap,
13
+ )
14
+
15
+ ROOT_DIR = Path(__file__).resolve().parents[1]
16
+ DEFAULT_BRAND_PATH = ROOT_DIR / "scripts_projet04" / "brand" / "brand.yml"
17
+
18
+
19
+ def _resolve_path(path: Union[str, Path, None]) -> Path:
20
+ if path is None:
21
+ return DEFAULT_BRAND_PATH
22
+ return Path(path).expanduser().resolve()
23
+
24
+
25
+ @lru_cache(maxsize=1)
26
+ def load_brand_config(path: Union[str, Path, None] = None) -> ThemeConfig:
27
+ """Load the brand YAML once and return the parsed ThemeConfig."""
28
+ cfg_path = _resolve_path(path)
29
+ return load_brand(cfg_path)
30
+
31
+
32
+ @lru_cache(maxsize=1)
33
+ def apply_brand_theme(path: Union[str, Path, None] = None) -> ThemeConfig:
34
+ """
35
+ Apply the OpenClassrooms/TechNova brand theme globally.
36
+
37
+ Returns the ThemeConfig so callers can inspect colors if needed.
38
+ """
39
+ cfg_path = _resolve_path(path)
40
+ cfg = configure_brand(cfg_path)
41
+ Theme.apply()
42
+ return cfg
43
+
44
+
45
+ __all__ = [
46
+ "Theme",
47
+ "ThemeConfig",
48
+ "apply_brand_theme",
49
+ "load_brand_config",
50
+ "make_diverging_cmap",
51
+ "DEFAULT_BRAND_PATH",
52
+ ]
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py CHANGED
@@ -1,28 +1,202 @@
 
 
 
1
  from pathlib import Path
2
 
 
 
3
  from loguru import logger
4
- from tqdm import tqdm
5
  import typer
6
 
7
- from projet_05.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- app = typer.Typer()
 
 
 
 
10
 
 
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  @app.command()
13
  def main(
14
- # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
- input_path: Path = RAW_DATA_DIR / "dataset.csv",
16
- output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
17
- # ----------------------------------------------
 
 
 
 
 
 
 
 
18
  ):
19
- # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
- logger.info("Processing dataset...")
21
- for i in tqdm(range(10), total=10):
22
- if i == 5:
23
- logger.info("Something happened for iteration 5.")
24
- logger.success("Processing dataset complete.")
25
- # -----------------------------------------
26
 
27
 
28
  if __name__ == "__main__":
 
1
+ from __future__ import annotations
2
+
3
+ import sqlite3
4
  from pathlib import Path
5
 
6
+ import numpy as np
7
+ import pandas as pd
8
  from loguru import logger
 
9
  import typer
10
 
11
+ from projet_05.config import INTERIM_DATA_DIR
12
+ from projet_05.settings import Settings, load_settings
13
+
14
+ app = typer.Typer(help="Préparation et fusion des données sources.")
15
+
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # Utilitaires
19
+ # ---------------------------------------------------------------------------
20
+ def safe_read_csv(path: Path, *, dtype=None) -> pd.DataFrame:
21
+ """Read a CSV file and return an empty frame when it fails."""
22
+ try:
23
+ logger.info("Lecture du fichier {}", path)
24
+ return pd.read_csv(path, dtype=dtype)
25
+ except FileNotFoundError:
26
+ logger.warning("Fichier absent: {}", path)
27
+ return pd.DataFrame()
28
+ except Exception as exc: # pragma: no cover - log + empty dataframe
29
+ logger.error("Impossible de lire {} ({})", path, exc)
30
+ return pd.DataFrame()
31
+
32
+
33
+ def clean_text_values(df: pd.DataFrame) -> pd.DataFrame:
34
+ """Normalize textual values that often materialize missing values."""
35
+ replace_tokens = [
36
+ "",
37
+ " ",
38
+ " ",
39
+ " ",
40
+ "nan",
41
+ "NaN",
42
+ "NAN",
43
+ "None",
44
+ "JE ne sais pas",
45
+ "je ne sais pas",
46
+ "Je ne sais pas",
47
+ "Unknow",
48
+ "Unknown",
49
+ "non pertinent",
50
+ "Non pertinent",
51
+ "NON PERTINENT",
52
+ ]
53
+ normalized = df.copy()
54
+ normalized = normalized.replace(replace_tokens, np.nan)
55
+
56
+ for column in normalized.select_dtypes(include="object"):
57
+ normalized[column] = (
58
+ normalized[column].replace(replace_tokens, np.nan).astype("string").str.strip()
59
+ )
60
+ return normalized
61
+
62
+
63
+ def _harmonize_id_column(df: pd.DataFrame, column: str, *, digits_only: bool = True) -> pd.DataFrame:
64
+ data = df.copy()
65
+ if column not in data.columns:
66
+ return data
67
+
68
+ if digits_only:
69
+ extracted = data[column].astype(str).str.extract(r"(\\d+)")
70
+ data[column] = pd.to_numeric(extracted[0], errors="coerce")
71
+ data[column] = pd.to_numeric(data[column], errors="coerce").astype("Int64")
72
+ return data
73
+
74
+
75
+ def _rename_column(df: pd.DataFrame, source: str, target: str) -> pd.DataFrame:
76
+ if source not in df.columns:
77
+ return df
78
+ return df.rename(columns={source: target})
79
+
80
+
81
+ def _log_id_diagnostics(df: pd.DataFrame, *, name: str, col_id: str) -> None:
82
+ if col_id not in df.columns:
83
+ logger.warning("La colonne {} est absente du fichier {}.", col_id, name)
84
+ return
85
+ total = len(df)
86
+ uniques = df[col_id].nunique(dropna=True)
87
+ duplicates = total - uniques
88
+ logger.info(
89
+ "{name}: {total} lignes | {uniques} identifiants uniques | {duplicates} doublons",
90
+ name=name,
91
+ total=total,
92
+ uniques=uniques,
93
+ duplicates=duplicates,
94
+ )
95
+
96
+
97
+ def _persist_sql_trace(df_dict: dict[str, pd.DataFrame], settings: Settings) -> pd.DataFrame:
98
+ """
99
+ Reproduire la fusion SQL décrite dans le notebook.
100
 
101
+ Chaque DataFrame est stocké dans une base SQLite éphémère pour
102
+ conserver une traçabilité de la requête exécutée.
103
+ """
104
+ db_path = settings.db_file
105
+ sql_path = settings.sql_file
106
 
107
+ db_path.parent.mkdir(parents=True, exist_ok=True)
108
+ sql_path.parent.mkdir(parents=True, exist_ok=True)
109
 
110
+ if db_path.exists():
111
+ db_path.unlink()
112
+
113
+ query = f"""
114
+ SELECT *
115
+ FROM sirh
116
+ INNER JOIN evaluation USING ({settings.col_id})
117
+ INNER JOIN sond USING ({settings.col_id});
118
+ """.strip()
119
+
120
+ with db_path.open("wb") as _:
121
+ pass # just ensure the file exists for sqlite on some platforms
122
+
123
+ with sqlite3.connect(db_path) as conn:
124
+ for name, frame in df_dict.items():
125
+ frame.to_sql(name, conn, index=False, if_exists="replace")
126
+ merged = pd.read_sql_query(query, conn)
127
+
128
+ sql_path.write_text(query, encoding="utf-8")
129
+ return merged
130
+
131
+
132
+ def build_dataset(settings: Settings) -> pd.DataFrame:
133
+ """Load, clean, harmonize and merge the three raw sources."""
134
+ sirh = clean_text_values(
135
+ safe_read_csv(settings.path_sirh).pipe(
136
+ _harmonize_id_column, settings.col_id, digits_only=True
137
+ )
138
+ )
139
+ evaluation = clean_text_values(
140
+ safe_read_csv(settings.path_eval)
141
+ .pipe(_rename_column, "eval_number", settings.col_id)
142
+ .pipe(_harmonize_id_column, settings.col_id, digits_only=True)
143
+ )
144
+ sond = clean_text_values(
145
+ safe_read_csv(settings.path_sondage)
146
+ .pipe(_rename_column, "code_sondage", settings.col_id)
147
+ .pipe(_harmonize_id_column, settings.col_id, digits_only=True)
148
+ )
149
+
150
+ for name, frame in {"sirh": sirh, "evaluation": evaluation, "sond": sond}.items():
151
+ _log_id_diagnostics(frame, name=name, col_id=settings.col_id)
152
+
153
+ frames = {
154
+ "sirh": sirh,
155
+ "evaluation": evaluation,
156
+ "sond": sond,
157
+ }
158
+ merged = _persist_sql_trace(frames, settings)
159
+
160
+ missing_cols = [settings.col_id] if settings.col_id not in merged.columns else []
161
+ if missing_cols:
162
+ raise KeyError(
163
+ f"La colonne {settings.col_id} est absente de la fusion finale. "
164
+ "Vérifiez vos fichiers sources."
165
+ )
166
+
167
+ logger.success("Fusion réalisée: {} lignes / {} colonnes", *merged.shape)
168
+ return merged
169
+
170
+
171
+ def save_dataset(df: pd.DataFrame, output_path: Path) -> None:
172
+ output_path.parent.mkdir(parents=True, exist_ok=True)
173
+ df.to_csv(output_path, index=False)
174
+ logger.success("Fichier fusionné sauvegardé dans {}", output_path)
175
+
176
+
177
+ # ---------------------------------------------------------------------------
178
+ # CLI
179
+ # ---------------------------------------------------------------------------
180
  @app.command()
181
  def main(
182
+ settings_path: Path = typer.Option(
183
+ None,
184
+ "--settings",
185
+ "-s",
186
+ help="Chemin vers un fichier settings.yml personnalisé.",
187
+ ),
188
+ output_path: Path = typer.Option(
189
+ INTERIM_DATA_DIR / "merged.csv",
190
+ "--output",
191
+ "-o",
192
+ help="Chemin de sortie du dataset fusionné.",
193
+ ),
194
  ):
195
+ """Entrypoint Typer pour reproduire la fusion des données brutes."""
196
+
197
+ settings = load_settings(settings_path) if settings_path else load_settings()
198
+ df = build_dataset(settings)
199
+ save_dataset(df, output_path)
 
 
200
 
201
 
202
  if __name__ == "__main__":
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/explainability.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Tuple
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from loguru import logger
9
+
10
+ from projet_05.branding import Theme, apply_brand_theme, make_diverging_cmap
11
+ from scripts_projet04.manet_projet04.shap_generator import ( # type: ignore[import-not-found]
12
+ shap_global,
13
+ shap_local,
14
+ )
15
+
16
+ apply_brand_theme()
17
+
18
+
19
+ def _shape_array(values) -> np.ndarray:
20
+ if hasattr(values, "values"):
21
+ arr = np.array(values.values)
22
+ else:
23
+ arr = np.array(values)
24
+ return np.nan_to_num(arr, copy=False)
25
+
26
+
27
+ def compute_shap_summary(
28
+ pipeline,
29
+ X: pd.DataFrame,
30
+ y: pd.Series,
31
+ *,
32
+ max_samples: int = 500,
33
+ ) -> Tuple[pd.DataFrame | None, object | None]:
34
+ """
35
+ Reuse the historical `shap_global` helper to build the plots and a tabular summary.
36
+
37
+ Returns
38
+ -------
39
+ summary_df : pd.DataFrame | None
40
+ Moyenne absolue des valeurs SHAP (ordre décroissant).
41
+ shap_values : shap.Explanation | None
42
+ Objet renvoyé par shap_global pour des analyses locales ultérieures.
43
+ """
44
+ cmap = make_diverging_cmap(Theme.PRIMARY, Theme.SECONDARY)
45
+ shap_values, _, feature_names = shap_global(
46
+ pipeline,
47
+ X,
48
+ y,
49
+ sample_size=max_samples,
50
+ cmap=cmap,
51
+ )
52
+ if shap_values is None or feature_names is None:
53
+ logger.warning("Impossible de générer les résumés SHAP.")
54
+ return None, None
55
+
56
+ shap_array = _shape_array(shap_values)
57
+ if shap_array.ndim == 1:
58
+ shap_array = shap_array.reshape(-1, 1)
59
+ mean_abs = np.abs(shap_array).mean(axis=0)
60
+ summary = (
61
+ pd.DataFrame({"feature": list(feature_names), "mean_abs_shap": mean_abs})
62
+ .sort_values("mean_abs_shap", ascending=False)
63
+ .reset_index(drop=True)
64
+ )
65
+ return summary, shap_values
66
+
67
+
68
+ def save_shap_summary(summary: pd.DataFrame, output_path: Path) -> None:
69
+ output_path.parent.mkdir(parents=True, exist_ok=True)
70
+ summary.to_csv(output_path, index=False)
71
+ logger.info("Résumé SHAP sauvegardé dans {}", output_path)
72
+
73
+
74
+ def export_local_explanations(
75
+ pipeline,
76
+ shap_values,
77
+ X: pd.DataFrame,
78
+ custom_index: int | None = None,
79
+ ) -> None:
80
+ """
81
+ Génère trois cas d'usage par défaut (impact max, risque max, risque min)
82
+ et un indice custom optionnel pour la trace historique.
83
+ """
84
+ if shap_values is None:
85
+ return
86
+
87
+ shap_array = _shape_array(shap_values)
88
+ idx_impact = int(np.argmax(np.sum(np.abs(shap_array), axis=1)))
89
+ shap_local(idx_impact, shap_values)
90
+
91
+ y_proba_all = pipeline.predict_proba(X)[:, 1]
92
+ idx_highrisk = int(np.argmax(y_proba_all))
93
+ shap_local(idx_highrisk, shap_values)
94
+
95
+ idx_lowrisk = int(np.argmin(y_proba_all))
96
+ shap_local(idx_lowrisk, shap_values, text_scale=0.6)
97
+
98
+ if custom_index is not None:
99
+ shap_local(custom_index, shap_values, max_display=8)
100
+
101
+
102
+ __all__ = ["compute_shap_summary", "save_shap_summary", "export_local_explanations"]
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py CHANGED
@@ -1,28 +1,170 @@
 
 
 
 
1
  from pathlib import Path
2
 
 
 
3
  from loguru import logger
4
- from tqdm import tqdm
5
  import typer
6
 
7
- from projet_05.config import PROCESSED_DATA_DIR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- app = typer.Typer()
 
 
 
 
 
 
 
 
 
10
 
 
 
 
 
 
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  @app.command()
13
  def main(
14
- # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
- input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
16
- output_path: Path = PROCESSED_DATA_DIR / "features.csv",
17
- # -----------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  ):
19
- # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
- logger.info("Generating features from dataset...")
21
- for i in tqdm(range(10), total=10):
22
- if i == 5:
23
- logger.info("Something happened for iteration 5.")
24
- logger.success("Features generation complete.")
25
- # -----------------------------------------
26
 
27
 
28
  if __name__ == "__main__":
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from datetime import datetime
5
  from pathlib import Path
6
 
7
+ import numpy as np
8
+ import pandas as pd
9
  from loguru import logger
 
10
  import typer
11
 
12
+ from projet_05.config import INTERIM_DATA_DIR, PROCESSED_DATA_DIR
13
+ from projet_05.settings import Settings, load_settings
14
+
15
+ app = typer.Typer(help="Génération des features et nettoyage de la cible.")
16
+
17
+ TARGET_MAPPING = {
18
+ "1": 1,
19
+ "0": 0,
20
+ "oui": 1,
21
+ "non": 0,
22
+ "true": 1,
23
+ "false": 0,
24
+ "quitte": 1,
25
+ "reste": 0,
26
+ "yes": 1,
27
+ "no": 0,
28
+ }
29
+
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # Utilitaires cœur de pipeline
33
+ # ---------------------------------------------------------------------------
34
+ def _load_merged_dataset(path: Path) -> pd.DataFrame:
35
+ if not path.exists():
36
+ raise FileNotFoundError(
37
+ f"Le fichier fusionné {path} est introuvable. Lancez `python projet_05/dataset.py` d'abord."
38
+ )
39
+ logger.info("Chargement du dataset fusionné depuis {}", path)
40
+ return pd.read_csv(path)
41
+
42
+
43
+ def _normalize_target(df: pd.DataFrame, settings: Settings) -> pd.DataFrame:
44
+ if settings.target not in df.columns:
45
+ raise KeyError(f"La variable cible '{settings.target}' est absente du fichier.")
46
+
47
+ normalized = (
48
+ df[settings.target]
49
+ .astype(str)
50
+ .str.strip()
51
+ .str.lower()
52
+ .map(TARGET_MAPPING)
53
+ )
54
+ df = df.copy()
55
+ df[settings.target] = normalized
56
+ before = len(df)
57
+ df = df[df[settings.target].isin([0, 1])].copy()
58
+ dropped = before - len(df)
59
+ if dropped:
60
+ logger.warning("Suppression de {} lignes avec une cible invalide.", dropped)
61
+ df[settings.target] = df[settings.target].astype(int)
62
+ return df
63
+
64
+
65
+ def _safe_ratio(df: pd.DataFrame, numerator: str, denominator: str, output: str) -> None:
66
+ if numerator not in df.columns or denominator not in df.columns:
67
+ return
68
+ denominator_series = df[denominator].replace({0: np.nan})
69
+ df[output] = df[numerator] / denominator_series
70
+
71
+
72
+ def _engineer_features(df: pd.DataFrame, settings: Settings) -> pd.DataFrame:
73
+ engineered = df.copy()
74
 
75
+ col = "augementation_salaire_precedente"
76
+ if col in engineered:
77
+ engineered[col] = (
78
+ engineered[col]
79
+ .astype(str)
80
+ .str.replace("%", "", regex=False)
81
+ .str.replace(",", ".", regex=False)
82
+ .str.strip()
83
+ )
84
+ engineered[col] = pd.to_numeric(engineered[col], errors="coerce") / 100
85
 
86
+ _safe_ratio(engineered, "augementation_salaire_precedente", "revenu_mensuel", "augmentation_par_revenu")
87
+ _safe_ratio(engineered, "annees_dans_le_poste_actuel", "annee_experience_totale", "annee_sur_poste_par_experience")
88
+ _safe_ratio(engineered, "nb_formations_suivies", "annee_experience_totale", "nb_formation_par_experience")
89
+ _safe_ratio(
90
+ engineered, "annees_depuis_la_derniere_promotion", "annee_experience_totale", "dern_promo_par_experience"
91
+ )
92
 
93
+ if settings.sat_cols:
94
+ existing = [col for col in settings.sat_cols if col in engineered.columns]
95
+ if existing:
96
+ engineered["score_moyen_satisfaction"] = engineered[existing].mean(axis=1)
97
+
98
+ if "note_evaluation_actuelle" in engineered.columns and "note_evaluation_precedente" in engineered.columns:
99
+ engineered["evolution_note"] = (
100
+ engineered["note_evaluation_actuelle"] - engineered["note_evaluation_precedente"]
101
+ )
102
+
103
+ return engineered
104
+
105
+
106
+ def build_features(settings: Settings, *, input_path: Path) -> pd.DataFrame:
107
+ df = _load_merged_dataset(input_path)
108
+ df = _normalize_target(df, settings)
109
+ df = _engineer_features(df, settings)
110
+ return df
111
+
112
+
113
+ def save_features(df: pd.DataFrame, output_path: Path) -> None:
114
+ output_path.parent.mkdir(parents=True, exist_ok=True)
115
+ df.to_csv(output_path, index=False)
116
+ logger.success("Dataset enrichi sauvegardé dans {}", output_path)
117
+
118
+
119
+ def save_schema(settings: Settings, output_path: Path) -> None:
120
+ schema = {
121
+ "target": settings.target,
122
+ "col_id": settings.col_id,
123
+ "numerical_features": list(settings.num_cols),
124
+ "categorical_features": list(settings.cat_cols),
125
+ "satisfaction_features": list(settings.sat_cols),
126
+ "generated_at": datetime.utcnow().isoformat(),
127
+ }
128
+ output_path.parent.mkdir(parents=True, exist_ok=True)
129
+ output_path.write_text(json.dumps(schema, indent=2), encoding="utf-8")
130
+ logger.info("Schéma sauvegardé dans {}", output_path)
131
+
132
+
133
+ # ---------------------------------------------------------------------------
134
+ # CLI
135
+ # ---------------------------------------------------------------------------
136
  @app.command()
137
  def main(
138
+ settings_path: Path = typer.Option(
139
+ None,
140
+ "--settings",
141
+ "-s",
142
+ help="Chemin optionnel vers un fichier settings.yml personnalisé.",
143
+ ),
144
+ input_path: Path = typer.Option(
145
+ INTERIM_DATA_DIR / "merged.csv",
146
+ "--input",
147
+ "-i",
148
+ help="Chemin du fichier issu de la fusion.",
149
+ ),
150
+ output_path: Path = typer.Option(
151
+ PROCESSED_DATA_DIR / "dataset.csv",
152
+ "--output",
153
+ "-o",
154
+ help="Chemin du fichier enrichi.",
155
+ ),
156
+ schema_path: Path = typer.Option(
157
+ PROCESSED_DATA_DIR / "schema.json",
158
+ "--schema",
159
+ help="Chemin de sauvegarde du schéma de features.",
160
+ ),
161
  ):
162
+ """Pipeline Typer pour préparer le dataset enrichi."""
163
+
164
+ settings = load_settings(settings_path) if settings_path else load_settings()
165
+ df = build_features(settings, input_path=input_path)
166
+ save_features(df, output_path)
167
+ save_schema(settings, schema_path)
 
168
 
169
 
170
  if __name__ == "__main__":