GitHub Actions
commited on
Commit
·
53af998
1
Parent(s):
9a917c1
🚀 Auto-deploy from GitHub Actions
Browse filesThis view is limited to 50 files because it contains too many changes. Â
See raw diff
- .github/workflows/deploy.yml +2 -2
- hf_space/hf_space/hf_space/README.md +34 -18
- hf_space/hf_space/hf_space/hf_space/LICENSE +10 -0
- hf_space/hf_space/hf_space/hf_space/Makefile +85 -0
- hf_space/hf_space/hf_space/hf_space/app.py +178 -4
- hf_space/hf_space/hf_space/hf_space/docs/.gitkeep +0 -0
- hf_space/hf_space/hf_space/hf_space/docs/README.md +12 -0
- hf_space/hf_space/hf_space/hf_space/docs/docs/getting-started.md +6 -0
- hf_space/hf_space/hf_space/hf_space/docs/docs/index.md +10 -0
- hf_space/hf_space/hf_space/hf_space/docs/mkdocs.yml +4 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +7 -4
- hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +191 -1
- hf_space/hf_space/hf_space/hf_space/hf_space/README.md +328 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py +0 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py +7 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +37 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes +35 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +2 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +12 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +7 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py +17 -0
- hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep +0 -0
- hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
- hf_space/hf_space/hf_space/hf_space/poetry.lock +0 -0
- hf_space/hf_space/hf_space/hf_space/poetry.toml +2 -0
- hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py +1 -0
- hf_space/hf_space/hf_space/hf_space/projet_05/config.py +32 -0
- hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py +29 -0
- hf_space/hf_space/hf_space/hf_space/projet_05/features.py +29 -0
- hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py +0 -0
- hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py +30 -0
- hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py +30 -0
- hf_space/hf_space/hf_space/hf_space/projet_05/plots.py +29 -0
- hf_space/hf_space/hf_space/hf_space/pyproject.toml +53 -0
- hf_space/hf_space/hf_space/hf_space/references/.gitkeep +0 -0
- hf_space/hf_space/hf_space/hf_space/reports/.gitkeep +0 -0
- hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep +0 -0
- hf_space/hf_space/hf_space/hf_space/tests/test_data.py +5 -0
- hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
- hf_space/hf_space/hf_space/projet_05/__init__.py +3 -0
- hf_space/hf_space/hf_space/projet_05/branding.py +52 -0
- hf_space/hf_space/hf_space/projet_05/dataset.py +188 -14
- hf_space/hf_space/hf_space/projet_05/explainability.py +102 -0
- hf_space/hf_space/hf_space/projet_05/features.py +156 -14
- hf_space/hf_space/hf_space/projet_05/modeling/predict.py +84 -14
- hf_space/hf_space/hf_space/projet_05/modeling/train.py +328 -15
- hf_space/hf_space/hf_space/projet_05/settings.py +114 -0
- hf_space/hf_space/hf_space/projet_05/settings.yml +56 -0
- hf_space/hf_space/hf_space/scripts_projet04/brand/__init__.py +0 -0
- hf_space/hf_space/hf_space/scripts_projet04/brand/brand.py +713 -0
.github/workflows/deploy.yml
CHANGED
|
@@ -33,8 +33,8 @@ jobs:
|
|
| 33 |
git config --global user.email "actions@github.com"
|
| 34 |
git config --global user.name "GitHub Actions"
|
| 35 |
git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
|
| 36 |
-
rsync -av --exclude '.git' ./ hf_space/
|
| 37 |
cd hf_space
|
| 38 |
git add .
|
| 39 |
git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
|
| 40 |
-
git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
|
|
|
|
| 33 |
git config --global user.email "actions@github.com"
|
| 34 |
git config --global user.name "GitHub Actions"
|
| 35 |
git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
|
| 36 |
+
rsync -av --exclude '.git' --exclude 'output/' --exclude 'models/' ./ hf_space/
|
| 37 |
cd hf_space
|
| 38 |
git add .
|
| 39 |
git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
|
| 40 |
+
git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
|
hf_space/hf_space/hf_space/README.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
| 1 |
# projet_05
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
<a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
|
| 4 |
<img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
|
| 5 |
</a>
|
|
@@ -57,6 +69,11 @@ Déployez un modèle de Machine Learning
|
|
| 57 |
└── plots.py <- Code to create visualizations
|
| 58 |
```
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
--------
|
| 61 |
|
| 62 |
---
|
|
@@ -93,6 +110,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
|
|
| 93 |
*** https://www.markdownguide.org/basic-syntax/#reference-style-links
|
| 94 |
-->
|
| 95 |
[![Contributors][contributors-shield]][contributors-url]
|
|
|
|
| 96 |
[![Forks][forks-shield]][forks-url]
|
| 97 |
[![Stargazers][stars-shield]][stars-url]
|
| 98 |
[![Issues][issues-shield]][issues-url]
|
|
@@ -236,7 +254,7 @@ _For more examples, please refer to the [Documentation](https://example.com)_
|
|
| 236 |
- [ ] Feature 3
|
| 237 |
- [ ] Nested Feature
|
| 238 |
|
| 239 |
-
See the [open issues](https://github.com/
|
| 240 |
|
| 241 |
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 242 |
|
|
@@ -299,18 +317,18 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
|
|
| 299 |
|
| 300 |
<!-- MARKDOWN LINKS & IMAGES -->
|
| 301 |
<!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
|
| 302 |
-
[contributors-shield]: https://img.shields.io/github/contributors/
|
| 303 |
-
[contributors-url]: https://github.com/
|
| 304 |
-
[forks-shield]: https://img.shields.io/github/forks/
|
| 305 |
-
[forks-url]: https://github.com/
|
| 306 |
-
[stars-shield]: https://img.shields.io/github/stars/
|
| 307 |
-
[stars-url]: https://github.com/
|
| 308 |
-
[issues-shield]: https://img.shields.io/github/issues/
|
| 309 |
-
[issues-url]: https://github.com/
|
| 310 |
-
[license-shield]: https://img.shields.io/github/license/
|
| 311 |
-
[license-url]: https://github.com/
|
| 312 |
[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
|
| 313 |
-
[linkedin-url]: https://linkedin.com/in/
|
| 314 |
[product-screenshot]: images/screenshot.png
|
| 315 |
[Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
|
| 316 |
<!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
|
|
@@ -331,10 +349,8 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
|
|
| 331 |
[JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
|
| 332 |
[JQuery-url]: https://jquery.com
|
| 333 |
<!-- TODO: -->
|
| 334 |
-
[
|
| 335 |
-
[
|
| 336 |
-
[
|
| 337 |
-
[
|
| 338 |
-
[](#)
|
| 339 |
[](#)
|
| 340 |
-
[](#)[text](../projet_04/.gitignore)
|
|
|
|
| 1 |
# projet_05
|
| 2 |
|
| 3 |
+
---
|
| 4 |
+
title: OCR_Projet05
|
| 5 |
+
emoji: 🔥
|
| 6 |
+
colorFrom: purple
|
| 7 |
+
colorTo: purple
|
| 8 |
+
sdk: gradio
|
| 9 |
+
sdk_version: 5.49.1
|
| 10 |
+
app_file: app.py
|
| 11 |
+
pinned: true
|
| 12 |
+
short_description: Projet 05 formation Openclassrooms
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
<a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
|
| 16 |
<img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
|
| 17 |
</a>
|
|
|
|
| 69 |
└── plots.py <- Code to create visualizations
|
| 70 |
```
|
| 71 |
|
| 72 |
+
## Code hérité réutilisé
|
| 73 |
+
|
| 74 |
+
- `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thème.
|
| 75 |
+
- `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
|
| 76 |
+
|
| 77 |
--------
|
| 78 |
|
| 79 |
---
|
|
|
|
| 110 |
*** https://www.markdownguide.org/basic-syntax/#reference-style-links
|
| 111 |
-->
|
| 112 |
[![Contributors][contributors-shield]][contributors-url]
|
| 113 |
+
[![Python][python]][python]
|
| 114 |
[![Forks][forks-shield]][forks-url]
|
| 115 |
[![Stargazers][stars-shield]][stars-url]
|
| 116 |
[![Issues][issues-shield]][issues-url]
|
|
|
|
| 254 |
- [ ] Feature 3
|
| 255 |
- [ ] Nested Feature
|
| 256 |
|
| 257 |
+
See the [open issues](https://github.com/stephmnt/OCR_projet05/issues) for a full list of proposed features (and known issues).
|
| 258 |
|
| 259 |
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 260 |
|
|
|
|
| 317 |
|
| 318 |
<!-- MARKDOWN LINKS & IMAGES -->
|
| 319 |
<!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
|
| 320 |
+
[contributors-shield]: https://img.shields.io/github/contributors/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 321 |
+
[contributors-url]: https://github.com/stephmnt/OCR_projet05/graphs/contributors
|
| 322 |
+
[forks-shield]: https://img.shields.io/github/forks/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 323 |
+
[forks-url]: https://github.com/stephmnt/OCR_projet05/network/members
|
| 324 |
+
[stars-shield]: https://img.shields.io/github/stars/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 325 |
+
[stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
|
| 326 |
+
[issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 327 |
+
[issues-url]: https://github.com/stephmnt/OCR_projet05/issues
|
| 328 |
+
[license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
|
| 329 |
+
[license-url]: https://github.com/stephmnt/OCR_projet05/blob/master/LICENSE.txt
|
| 330 |
[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
|
| 331 |
+
[linkedin-url]: https://linkedin.com/in/stephanemanet
|
| 332 |
[product-screenshot]: images/screenshot.png
|
| 333 |
[Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
|
| 334 |
<!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
|
|
|
|
| 349 |
[JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
|
| 350 |
[JQuery-url]: https://jquery.com
|
| 351 |
<!-- TODO: -->
|
| 352 |
+
[Postgres]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
|
| 353 |
+
[Python]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
|
| 354 |
+
[MkDocs]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
|
| 355 |
+
[NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
|
|
|
|
| 356 |
[](#)
|
|
|
hf_space/hf_space/hf_space/hf_space/LICENSE
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
The MIT License (MIT)
|
| 3 |
+
Copyright (c) 2025, Stéphane Manet
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
| 6 |
+
|
| 7 |
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
| 8 |
+
|
| 9 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
| 10 |
+
|
hf_space/hf_space/hf_space/hf_space/Makefile
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#################################################################################
|
| 2 |
+
# GLOBALS #
|
| 3 |
+
#################################################################################
|
| 4 |
+
|
| 5 |
+
PROJECT_NAME = OCR_projet05
|
| 6 |
+
PYTHON_VERSION = 3.10
|
| 7 |
+
PYTHON_INTERPRETER = python
|
| 8 |
+
|
| 9 |
+
#################################################################################
|
| 10 |
+
# COMMANDS #
|
| 11 |
+
#################################################################################
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
## Install Python dependencies
|
| 15 |
+
.PHONY: requirements
|
| 16 |
+
requirements:
|
| 17 |
+
pip install -e .
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
## Delete all compiled Python files
|
| 23 |
+
.PHONY: clean
|
| 24 |
+
clean:
|
| 25 |
+
find . -type f -name "*.py[co]" -delete
|
| 26 |
+
find . -type d -name "__pycache__" -delete
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
## Lint using ruff (use `make format` to do formatting)
|
| 30 |
+
.PHONY: lint
|
| 31 |
+
lint:
|
| 32 |
+
ruff format --check
|
| 33 |
+
ruff check
|
| 34 |
+
|
| 35 |
+
## Format source code with ruff
|
| 36 |
+
.PHONY: format
|
| 37 |
+
format:
|
| 38 |
+
ruff check --fix
|
| 39 |
+
ruff format
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
## Run tests
|
| 44 |
+
.PHONY: test
|
| 45 |
+
test:
|
| 46 |
+
python -m pytest tests
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
## Set up Python interpreter environment
|
| 50 |
+
.PHONY: create_environment
|
| 51 |
+
create_environment:
|
| 52 |
+
@bash -c "if [ ! -z `which virtualenvwrapper.sh` ]; then source `which virtualenvwrapper.sh`; mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); else mkvirtualenv.bat $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); fi"
|
| 53 |
+
@echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
#################################################################################
|
| 59 |
+
# PROJECT RULES #
|
| 60 |
+
#################################################################################
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
## Make dataset
|
| 64 |
+
.PHONY: data
|
| 65 |
+
data: requirements
|
| 66 |
+
$(PYTHON_INTERPRETER) projet_05/dataset.py
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
#################################################################################
|
| 70 |
+
# Self Documenting Commands #
|
| 71 |
+
#################################################################################
|
| 72 |
+
|
| 73 |
+
.DEFAULT_GOAL := help
|
| 74 |
+
|
| 75 |
+
define PRINT_HELP_PYSCRIPT
|
| 76 |
+
import re, sys; \
|
| 77 |
+
lines = '\n'.join([line for line in sys.stdin]); \
|
| 78 |
+
matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
|
| 79 |
+
print('Available rules:\n'); \
|
| 80 |
+
print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
|
| 81 |
+
endef
|
| 82 |
+
export PRINT_HELP_PYSCRIPT
|
| 83 |
+
|
| 84 |
+
help:
|
| 85 |
+
@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
|
hf_space/hf_space/hf_space/hf_space/app.py
CHANGED
|
@@ -1,7 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
def greet(name):
|
| 4 |
-
return "Hello " + name + "!!"
|
| 5 |
|
| 6 |
-
|
| 7 |
-
demo.launch()
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
import gradio as gr
|
| 8 |
+
import pandas as pd
|
| 9 |
+
from loguru import logger
|
| 10 |
+
|
| 11 |
+
from projet_05.branding import apply_brand_theme
|
| 12 |
+
from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
|
| 13 |
+
|
| 14 |
+
MODEL_PATH = Path("models/best_model.joblib")
|
| 15 |
+
METADATA_PATH = Path("models/best_model_meta.json")
|
| 16 |
+
SCHEMA_PATH = Path("data/processed/schema.json")
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _load_schema(path: Path) -> dict[str, Any]:
|
| 20 |
+
if not path.exists():
|
| 21 |
+
return {}
|
| 22 |
+
return json.loads(path.read_text(encoding="utf-8"))
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
|
| 26 |
+
if schema:
|
| 27 |
+
candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
|
| 28 |
+
if candidates:
|
| 29 |
+
return candidates
|
| 30 |
+
features = metadata.get("features", {})
|
| 31 |
+
explicit = (features.get("numerical") or []) + (features.get("categorical") or [])
|
| 32 |
+
if explicit:
|
| 33 |
+
return explicit
|
| 34 |
+
if pipeline is not None and hasattr(pipeline, "feature_names_in_"):
|
| 35 |
+
return list(pipeline.feature_names_in_)
|
| 36 |
+
return []
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
|
| 40 |
+
if isinstance(payload, pd.DataFrame):
|
| 41 |
+
df = payload.copy()
|
| 42 |
+
elif payload is None:
|
| 43 |
+
df = pd.DataFrame(columns=headers)
|
| 44 |
+
else:
|
| 45 |
+
df = pd.DataFrame(payload, columns=headers if headers else None)
|
| 46 |
+
df = df.dropna(how="all")
|
| 47 |
+
if df.empty:
|
| 48 |
+
raise gr.Error("Merci de saisir au moins une ligne complète.")
|
| 49 |
+
return df
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _ensure_model():
|
| 53 |
+
if PIPELINE is None:
|
| 54 |
+
raise gr.Error(
|
| 55 |
+
"Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def score_table(table):
|
| 60 |
+
_ensure_model()
|
| 61 |
+
df = _convert_input(table, FEATURE_ORDER)
|
| 62 |
+
drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
|
| 63 |
+
return run_inference(
|
| 64 |
+
df,
|
| 65 |
+
PIPELINE,
|
| 66 |
+
THRESHOLD,
|
| 67 |
+
drop_columns=drop_cols,
|
| 68 |
+
required_features=FEATURE_ORDER or None,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def score_csv(upload):
|
| 73 |
+
_ensure_model()
|
| 74 |
+
if upload is None:
|
| 75 |
+
raise gr.Error("Veuillez déposer un fichier CSV.")
|
| 76 |
+
df = pd.read_csv(upload.name)
|
| 77 |
+
drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
|
| 78 |
+
return run_inference(
|
| 79 |
+
df,
|
| 80 |
+
PIPELINE,
|
| 81 |
+
THRESHOLD,
|
| 82 |
+
drop_columns=drop_cols,
|
| 83 |
+
required_features=FEATURE_ORDER or None,
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def predict_from_form(*values):
|
| 88 |
+
_ensure_model()
|
| 89 |
+
if not FEATURE_ORDER:
|
| 90 |
+
raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
|
| 91 |
+
payload = {feature: value for feature, value in zip(FEATURE_ORDER, values)}
|
| 92 |
+
df = pd.DataFrame([payload])
|
| 93 |
+
scored = run_inference(
|
| 94 |
+
df,
|
| 95 |
+
PIPELINE,
|
| 96 |
+
THRESHOLD,
|
| 97 |
+
required_features=FEATURE_ORDER or None,
|
| 98 |
+
)
|
| 99 |
+
row = scored.iloc[0]
|
| 100 |
+
label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
|
| 101 |
+
return {
|
| 102 |
+
"probability": round(float(row["proba_depart"]), 4),
|
| 103 |
+
"decision": label,
|
| 104 |
+
"threshold": THRESHOLD,
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
# Chargement des artéfacts
|
| 109 |
+
apply_brand_theme()
|
| 110 |
+
|
| 111 |
+
PIPELINE = None
|
| 112 |
+
METADATA: dict[str, Any] = {}
|
| 113 |
+
THRESHOLD = 0.5
|
| 114 |
+
TARGET_COLUMN: str | None = None
|
| 115 |
+
SCHEMA = _load_schema(SCHEMA_PATH)
|
| 116 |
+
|
| 117 |
+
try:
|
| 118 |
+
PIPELINE = load_pipeline(MODEL_PATH)
|
| 119 |
+
METADATA = load_metadata(METADATA_PATH)
|
| 120 |
+
THRESHOLD = float(METADATA.get("best_threshold", THRESHOLD))
|
| 121 |
+
TARGET_COLUMN = METADATA.get("target")
|
| 122 |
+
except FileNotFoundError as exc:
|
| 123 |
+
logger.warning("Artéfact manquant: {}", exc)
|
| 124 |
+
|
| 125 |
+
FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
|
| 126 |
+
|
| 127 |
+
with gr.Blocks(title="Prédicteur d'attrition") as demo:
|
| 128 |
+
gr.Markdown("# API Gradio – Prédiction de départ employé")
|
| 129 |
+
gr.Markdown(
|
| 130 |
+
"Le modèle applique le pipeline entraîné hors-notebook pour fournir une probabilité de départ ainsi qu'une décision binaire."
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
if PIPELINE is None:
|
| 134 |
+
gr.Markdown(
|
| 135 |
+
"⚠️ **Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
|
| 136 |
+
)
|
| 137 |
+
else:
|
| 138 |
+
gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
|
| 139 |
+
|
| 140 |
+
with gr.Tab("Formulaire unitaire"):
|
| 141 |
+
if not FEATURE_ORDER:
|
| 142 |
+
gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
|
| 143 |
+
else:
|
| 144 |
+
form_inputs: list[gr.components.Component] = [] # type: ignore
|
| 145 |
+
for feature in FEATURE_ORDER:
|
| 146 |
+
form_inputs.append(
|
| 147 |
+
gr.Textbox(label=feature, placeholder=f"Saisir {feature.replace('_', ' ')}")
|
| 148 |
+
)
|
| 149 |
+
form_output = gr.JSON(label="Résultat")
|
| 150 |
+
gr.Button("Prédire").click(
|
| 151 |
+
fn=predict_from_form,
|
| 152 |
+
inputs=form_inputs,
|
| 153 |
+
outputs=form_output,
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
with gr.Tab("Tableau interactif"):
|
| 157 |
+
table_input = gr.Dataframe(
|
| 158 |
+
headers=FEATURE_ORDER if FEATURE_ORDER else None,
|
| 159 |
+
row_count=(1, "dynamic"),
|
| 160 |
+
col_count=(len(FEATURE_ORDER), "dynamic") if FEATURE_ORDER else (5, "dynamic"),
|
| 161 |
+
type="pandas",
|
| 162 |
+
)
|
| 163 |
+
table_output = gr.Dataframe(label="Prédictions", type="pandas")
|
| 164 |
+
gr.Button("Scorer les lignes").click(
|
| 165 |
+
fn=score_table,
|
| 166 |
+
inputs=table_input,
|
| 167 |
+
outputs=table_output,
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
with gr.Tab("Fichier CSV"):
|
| 171 |
+
file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
|
| 172 |
+
file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
|
| 173 |
+
gr.Button("Scorer le fichier").click(
|
| 174 |
+
fn=score_csv,
|
| 175 |
+
inputs=file_input,
|
| 176 |
+
outputs=file_output,
|
| 177 |
+
)
|
| 178 |
|
|
|
|
|
|
|
| 179 |
|
| 180 |
+
if __name__ == "__main__":
|
| 181 |
+
demo.launch()
|
hf_space/hf_space/hf_space/hf_space/docs/.gitkeep
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/hf_space/docs/README.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Generating the docs
|
| 2 |
+
----------
|
| 3 |
+
|
| 4 |
+
Use [mkdocs](http://www.mkdocs.org/) structure to update the documentation.
|
| 5 |
+
|
| 6 |
+
Build locally with:
|
| 7 |
+
|
| 8 |
+
mkdocs build
|
| 9 |
+
|
| 10 |
+
Serve locally with:
|
| 11 |
+
|
| 12 |
+
mkdocs serve
|
hf_space/hf_space/hf_space/hf_space/docs/docs/getting-started.md
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Getting started
|
| 2 |
+
===============
|
| 3 |
+
|
| 4 |
+
This is where you describe how to get set up on a clean install, including the
|
| 5 |
+
commands necessary to get the raw data (using the `sync_data_from_s3` command,
|
| 6 |
+
for example), and then how to make the cleaned, final data sets.
|
hf_space/hf_space/hf_space/hf_space/docs/docs/index.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# projet_05 documentation!
|
| 2 |
+
|
| 3 |
+
## Description
|
| 4 |
+
|
| 5 |
+
Déployez un modèle de Machine Learning
|
| 6 |
+
|
| 7 |
+
## Commands
|
| 8 |
+
|
| 9 |
+
The Makefile contains the central entry points for common tasks related to this project.
|
| 10 |
+
|
hf_space/hf_space/hf_space/hf_space/docs/mkdocs.yml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
site_name: projet_05
|
| 2 |
+
#
|
| 3 |
+
site_author: Stéphane Manet
|
| 4 |
+
#
|
hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml
CHANGED
|
@@ -1,10 +1,13 @@
|
|
| 1 |
-
name:
|
| 2 |
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
branches:
|
| 6 |
- main
|
| 7 |
|
|
|
|
|
|
|
|
|
|
| 8 |
jobs:
|
| 9 |
deploy:
|
| 10 |
runs-on: ubuntu-latest
|
|
@@ -23,7 +26,7 @@ jobs:
|
|
| 23 |
python -m pip install --upgrade pip
|
| 24 |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
| 25 |
|
| 26 |
-
- name:
|
| 27 |
env:
|
| 28 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 29 |
run: |
|
|
@@ -33,5 +36,5 @@ jobs:
|
|
| 33 |
rsync -av --exclude '.git' ./ hf_space/
|
| 34 |
cd hf_space
|
| 35 |
git add .
|
| 36 |
-
git commit -m "🚀 Auto-deploy from GitHub Actions"
|
| 37 |
-
git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
|
|
|
|
| 1 |
+
name: Deploy to Hugging Face Spaces
|
| 2 |
|
| 3 |
on:
|
| 4 |
push:
|
| 5 |
branches:
|
| 6 |
- main
|
| 7 |
|
| 8 |
+
permissions:
|
| 9 |
+
contents: write
|
| 10 |
+
|
| 11 |
jobs:
|
| 12 |
deploy:
|
| 13 |
runs-on: ubuntu-latest
|
|
|
|
| 26 |
python -m pip install --upgrade pip
|
| 27 |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
| 28 |
|
| 29 |
+
- name: Deploy to Hugging Face Space
|
| 30 |
env:
|
| 31 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 32 |
run: |
|
|
|
|
| 36 |
rsync -av --exclude '.git' ./ hf_space/
|
| 37 |
cd hf_space
|
| 38 |
git add .
|
| 39 |
+
git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
|
| 40 |
+
git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
|
hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore
CHANGED
|
@@ -1,2 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
*.code-workspace
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Data
|
| 2 |
+
/data/
|
| 3 |
+
|
| 4 |
+
# Mac OS-specific storage files
|
| 5 |
+
.DS_Store
|
| 6 |
*.code-workspace
|
| 7 |
+
|
| 8 |
+
# vim
|
| 9 |
+
*.swp
|
| 10 |
+
*.swo
|
| 11 |
+
|
| 12 |
+
## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
|
| 13 |
+
|
| 14 |
+
# Byte-compiled / optimized / DLL files
|
| 15 |
+
__pycache__/
|
| 16 |
+
*.py[cod]
|
| 17 |
+
*$py.class
|
| 18 |
+
|
| 19 |
+
# C extensions
|
| 20 |
+
*.so
|
| 21 |
+
|
| 22 |
+
# Distribution / packaging
|
| 23 |
+
.Python
|
| 24 |
+
build/
|
| 25 |
+
develop-eggs/
|
| 26 |
+
dist/
|
| 27 |
+
downloads/
|
| 28 |
+
eggs/
|
| 29 |
+
.eggs/
|
| 30 |
+
lib/
|
| 31 |
+
lib64/
|
| 32 |
+
parts/
|
| 33 |
+
sdist/
|
| 34 |
+
var/
|
| 35 |
+
wheels/
|
| 36 |
+
share/python-wheels/
|
| 37 |
+
*.egg-info/
|
| 38 |
+
.installed.cfg
|
| 39 |
+
*.egg
|
| 40 |
+
MANIFEST
|
| 41 |
+
|
| 42 |
+
# PyInstaller
|
| 43 |
+
# Usually these files are written by a python script from a template
|
| 44 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 45 |
+
*.manifest
|
| 46 |
+
*.spec
|
| 47 |
+
|
| 48 |
+
# Installer logs
|
| 49 |
+
pip-log.txt
|
| 50 |
+
pip-delete-this-directory.txt
|
| 51 |
+
|
| 52 |
+
# Unit test / coverage reports
|
| 53 |
+
htmlcov/
|
| 54 |
+
.tox/
|
| 55 |
+
.nox/
|
| 56 |
+
.coverage
|
| 57 |
+
.coverage.*
|
| 58 |
+
.cache
|
| 59 |
+
nosetests.xml
|
| 60 |
+
coverage.xml
|
| 61 |
+
*.cover
|
| 62 |
+
*.py,cover
|
| 63 |
+
.hypothesis/
|
| 64 |
+
.pytest_cache/
|
| 65 |
+
cover/
|
| 66 |
+
|
| 67 |
+
# Translations
|
| 68 |
+
*.mo
|
| 69 |
+
*.pot
|
| 70 |
+
|
| 71 |
+
# Django stuff:
|
| 72 |
+
*.log
|
| 73 |
+
local_settings.py
|
| 74 |
+
db.sqlite3
|
| 75 |
+
db.sqlite3-journal
|
| 76 |
+
|
| 77 |
+
# Flask stuff:
|
| 78 |
+
instance/
|
| 79 |
+
.webassets-cache
|
| 80 |
+
|
| 81 |
+
# Scrapy stuff:
|
| 82 |
+
.scrapy
|
| 83 |
+
|
| 84 |
+
# MkDocs documentation
|
| 85 |
+
docs/site/
|
| 86 |
+
|
| 87 |
+
# PyBuilder
|
| 88 |
+
.pybuilder/
|
| 89 |
+
target/
|
| 90 |
+
|
| 91 |
+
# Jupyter Notebook
|
| 92 |
+
.ipynb_checkpoints
|
| 93 |
+
|
| 94 |
+
# IPython
|
| 95 |
+
profile_default/
|
| 96 |
+
ipython_config.py
|
| 97 |
+
|
| 98 |
+
# pyenv
|
| 99 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 100 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 101 |
+
# .python-version
|
| 102 |
+
|
| 103 |
+
# pipenv
|
| 104 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 105 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 106 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 107 |
+
# install all needed dependencies.
|
| 108 |
+
#Pipfile.lock
|
| 109 |
+
|
| 110 |
+
# UV
|
| 111 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 112 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 113 |
+
# commonly ignored for libraries.
|
| 114 |
+
#uv.lock
|
| 115 |
+
|
| 116 |
+
# poetry
|
| 117 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 118 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 119 |
+
# commonly ignored for libraries.
|
| 120 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 121 |
+
#poetry.lock
|
| 122 |
+
|
| 123 |
+
# pdm
|
| 124 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 125 |
+
#pdm.lock
|
| 126 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 127 |
+
# in version control.
|
| 128 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
| 129 |
+
.pdm.toml
|
| 130 |
+
.pdm-python
|
| 131 |
+
.pdm-build/
|
| 132 |
+
|
| 133 |
+
# pixi
|
| 134 |
+
# pixi.lock should be committed to version control for reproducibility
|
| 135 |
+
# .pixi/ contains the environments and should not be committed
|
| 136 |
+
.pixi/
|
| 137 |
+
|
| 138 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 139 |
+
__pypackages__/
|
| 140 |
+
|
| 141 |
+
# Celery stuff
|
| 142 |
+
celerybeat-schedule
|
| 143 |
+
celerybeat.pid
|
| 144 |
+
|
| 145 |
+
# SageMath parsed files
|
| 146 |
+
*.sage.py
|
| 147 |
+
|
| 148 |
+
# Environments
|
| 149 |
+
.env
|
| 150 |
+
.venv
|
| 151 |
+
env/
|
| 152 |
+
venv/
|
| 153 |
+
ENV/
|
| 154 |
+
env.bak/
|
| 155 |
+
venv.bak/
|
| 156 |
+
|
| 157 |
+
# Spyder project settings
|
| 158 |
+
.spyderproject
|
| 159 |
+
.spyproject
|
| 160 |
+
|
| 161 |
+
# Rope project settings
|
| 162 |
+
.ropeproject
|
| 163 |
+
|
| 164 |
+
# mkdocs documentation
|
| 165 |
+
/site
|
| 166 |
+
|
| 167 |
+
# mypy
|
| 168 |
+
.mypy_cache/
|
| 169 |
+
.dmypy.json
|
| 170 |
+
dmypy.json
|
| 171 |
+
|
| 172 |
+
# Pyre type checker
|
| 173 |
+
.pyre/
|
| 174 |
+
|
| 175 |
+
# pytype static type analyzer
|
| 176 |
+
.pytype/
|
| 177 |
+
|
| 178 |
+
# Cython debug symbols
|
| 179 |
+
cython_debug/
|
| 180 |
+
|
| 181 |
+
# PyCharm
|
| 182 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 183 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 184 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 185 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 186 |
+
#.idea/
|
| 187 |
+
|
| 188 |
+
# Ruff stuff:
|
| 189 |
+
.ruff_cache/
|
| 190 |
+
|
| 191 |
+
# PyPI configuration file
|
| 192 |
+
.pypirc
|
hf_space/hf_space/hf_space/hf_space/hf_space/README.md
CHANGED
|
@@ -1,3 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Projet 05
|
| 3 |
emoji: đź‘€
|
|
@@ -10,3 +71,270 @@ pinned: false
|
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# projet_05
|
| 2 |
+
|
| 3 |
+
<a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
|
| 4 |
+
<img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
|
| 5 |
+
</a>
|
| 6 |
+
|
| 7 |
+
Déployez un modèle de Machine Learning
|
| 8 |
+
|
| 9 |
+
## Organisation du projet
|
| 10 |
+
|
| 11 |
+
```
|
| 12 |
+
├── LICENSE <- Open-source license if one is chosen
|
| 13 |
+
├── Makefile <- Makefile with convenience commands like `make data` or `make train`
|
| 14 |
+
├── README.md <- The top-level README for developers using this project.
|
| 15 |
+
├── data
|
| 16 |
+
│ ├── external <- Data from third party sources.
|
| 17 |
+
│ ├── interim <- Intermediate data that has been transformed.
|
| 18 |
+
│ ├── processed <- The final, canonical data sets for modeling.
|
| 19 |
+
│ └── raw <- The original, immutable data dump.
|
| 20 |
+
│
|
| 21 |
+
├── docs <- A default mkdocs project; see www.mkdocs.org for details
|
| 22 |
+
│
|
| 23 |
+
├── models <- Trained and serialized models, model predictions, or model summaries
|
| 24 |
+
│
|
| 25 |
+
├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
|
| 26 |
+
│ the creator's initials, and a short `-` delimited description, e.g.
|
| 27 |
+
│ `1.0-jqp-initial-data-exploration`.
|
| 28 |
+
│
|
| 29 |
+
├── pyproject.toml <- Project configuration file with package metadata for
|
| 30 |
+
│ projet_05 and configuration for tools like black
|
| 31 |
+
│
|
| 32 |
+
├── references <- Data dictionaries, manuals, and all other explanatory materials.
|
| 33 |
+
│
|
| 34 |
+
├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
|
| 35 |
+
│ └── figures <- Generated graphics and figures to be used in reporting
|
| 36 |
+
│
|
| 37 |
+
├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
|
| 38 |
+
│ generated with `pip freeze > requirements.txt`
|
| 39 |
+
│
|
| 40 |
+
├── setup.cfg <- Configuration file for flake8
|
| 41 |
+
│
|
| 42 |
+
└── projet_05 <- Source code for use in this project.
|
| 43 |
+
│
|
| 44 |
+
├── __init__.py <- Makes projet_05 a Python module
|
| 45 |
+
│
|
| 46 |
+
├── config.py <- Store useful variables and configuration
|
| 47 |
+
│
|
| 48 |
+
├── dataset.py <- Scripts to download or generate data
|
| 49 |
+
│
|
| 50 |
+
├── features.py <- Code to create features for modeling
|
| 51 |
+
│
|
| 52 |
+
├── modeling
|
| 53 |
+
│ ├── __init__.py
|
| 54 |
+
│ ├── predict.py <- Code to run model inference with trained models
|
| 55 |
+
│ └── train.py <- Code to train models
|
| 56 |
+
│
|
| 57 |
+
└── plots.py <- Code to create visualizations
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
--------
|
| 61 |
+
|
| 62 |
---
|
| 63 |
title: Projet 05
|
| 64 |
emoji: đź‘€
|
|
|
|
| 71 |
---
|
| 72 |
|
| 73 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 74 |
+
|
| 75 |
+
<!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
|
| 76 |
+
<a id="readme-top"></a>
|
| 77 |
+
<!--
|
| 78 |
+
*** Thanks for checking out the Best-README-Template. If you have a suggestion
|
| 79 |
+
*** that would make this better, please fork the repo and create a pull request
|
| 80 |
+
*** or simply open an issue with the tag "enhancement".
|
| 81 |
+
*** Don't forget to give the project a star!
|
| 82 |
+
*** Thanks again! Now go create something AMAZING! :D
|
| 83 |
+
-->
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
<!-- PROJECT SHIELDS -->
|
| 88 |
+
<!--
|
| 89 |
+
*** I'm using markdown "reference style" links for readability.
|
| 90 |
+
*** Reference links are enclosed in brackets [ ] instead of parentheses ( ).
|
| 91 |
+
*** See the bottom of this document for the declaration of the reference variables
|
| 92 |
+
*** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use.
|
| 93 |
+
*** https://www.markdownguide.org/basic-syntax/#reference-style-links
|
| 94 |
+
-->
|
| 95 |
+
[![Contributors][contributors-shield]][contributors-url]
|
| 96 |
+
[![Forks][forks-shield]][forks-url]
|
| 97 |
+
[![Stargazers][stars-shield]][stars-url]
|
| 98 |
+
[![Issues][issues-shield]][issues-url]
|
| 99 |
+
[![project_license][license-shield]][license-url]
|
| 100 |
+
[![LinkedIn][linkedin-shield]][linkedin-url]
|
| 101 |
+

|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
<!-- PROJECT LOGO -->
|
| 106 |
+
<br />
|
| 107 |
+
<div align="center">
|
| 108 |
+
<a href="https://github.com/github_username/repo_name">
|
| 109 |
+
<img src="images/logo.png" alt="Logo" width="80" height="80">
|
| 110 |
+
</a>
|
| 111 |
+
|
| 112 |
+
<h3 align="center">project_title</h3>
|
| 113 |
+
|
| 114 |
+
<p align="center">
|
| 115 |
+
project_description
|
| 116 |
+
<br />
|
| 117 |
+
<a href="https://github.com/github_username/repo_name"><strong>Explore the docs »</strong></a>
|
| 118 |
+
<br />
|
| 119 |
+
<br />
|
| 120 |
+
<a href="https://github.com/github_username/repo_name">View Demo</a>
|
| 121 |
+
·
|
| 122 |
+
<a href="https://github.com/github_username/repo_name/issues/new?labels=bug&template=bug-report---.md">Report Bug</a>
|
| 123 |
+
·
|
| 124 |
+
<a href="https://github.com/github_username/repo_name/issues/new?labels=enhancement&template=feature-request---.md">Request Feature</a>
|
| 125 |
+
</p>
|
| 126 |
+
</div>
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
<!-- TABLE OF CONTENTS -->
|
| 131 |
+
<details>
|
| 132 |
+
<summary>Table of Contents</summary>
|
| 133 |
+
<ol>
|
| 134 |
+
<li>
|
| 135 |
+
<a href="#about-the-project">About The Project</a>
|
| 136 |
+
<ul>
|
| 137 |
+
<li><a href="#built-with">Built With</a></li>
|
| 138 |
+
</ul>
|
| 139 |
+
</li>
|
| 140 |
+
<li>
|
| 141 |
+
<a href="#getting-started">Getting Started</a>
|
| 142 |
+
<ul>
|
| 143 |
+
<li><a href="#prerequisites">Prerequisites</a></li>
|
| 144 |
+
<li><a href="#installation">Installation</a></li>
|
| 145 |
+
</ul>
|
| 146 |
+
</li>
|
| 147 |
+
<li><a href="#usage">Usage</a></li>
|
| 148 |
+
<li><a href="#roadmap">Roadmap</a></li>
|
| 149 |
+
<li><a href="#contributing">Contributing</a></li>
|
| 150 |
+
<li><a href="#license">License</a></li>
|
| 151 |
+
<li><a href="#contact">Contact</a></li>
|
| 152 |
+
<li><a href="#acknowledgments">Acknowledgments</a></li>
|
| 153 |
+
</ol>
|
| 154 |
+
</details>
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
<!-- ABOUT THE PROJECT -->
|
| 159 |
+
## About The Project
|
| 160 |
+
|
| 161 |
+
[![Product Name Screen Shot][product-screenshot]](https://example.com)
|
| 162 |
+
|
| 163 |
+
Here's a blank template to get started. To avoid retyping too much info, do a search and replace with your text editor for the following: `github_username`, `repo_name`, `twitter_handle`, `linkedin_username`, `email_client`, `email`, `project_title`, `project_description`, `project_license`
|
| 164 |
+
|
| 165 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
### Built With
|
| 170 |
+
|
| 171 |
+
* [![Python][Python]][Python-url]
|
| 172 |
+
* [![SQL][SQL]][SQL-url]
|
| 173 |
+
|
| 174 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
<!-- GETTING STARTED -->
|
| 179 |
+
## Getting Started
|
| 180 |
+
|
| 181 |
+
This is an example of how you may give instructions on setting up your project locally.
|
| 182 |
+
To get a local copy up and running follow these simple example steps.
|
| 183 |
+
|
| 184 |
+
### Prerequisites
|
| 185 |
+
|
| 186 |
+
This is an example of how to list things you need to use the software and how to install them.
|
| 187 |
+
* npm
|
| 188 |
+
```sh
|
| 189 |
+
npm install npm@latest -g
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
+
### Installation
|
| 193 |
+
|
| 194 |
+
pip install -r requirements.txt
|
| 195 |
+
uvicorn app.main:app --reload
|
| 196 |
+
|
| 197 |
+
1. Get a free API Key at [https://example.com](https://example.com)
|
| 198 |
+
2. Clone the repo
|
| 199 |
+
```sh
|
| 200 |
+
git clone https://github.com/github_username/repo_name.git
|
| 201 |
+
```
|
| 202 |
+
3. Install NPM packages
|
| 203 |
+
```sh
|
| 204 |
+
npm install
|
| 205 |
+
```
|
| 206 |
+
4. Enter your API in `config.js`
|
| 207 |
+
```js
|
| 208 |
+
const API_KEY = 'ENTER YOUR API';
|
| 209 |
+
```
|
| 210 |
+
5. Change git remote url to avoid accidental pushes to base project
|
| 211 |
+
```sh
|
| 212 |
+
git remote set-url origin github_username/repo_name
|
| 213 |
+
git remote -v # confirm the changes
|
| 214 |
+
```
|
| 215 |
+
|
| 216 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
<!-- USAGE EXAMPLES -->
|
| 221 |
+
## Usage
|
| 222 |
+
|
| 223 |
+
Use this space to show useful examples of how a project can be used. Additional screenshots, code examples and demos work well in this space. You may also link to more resources.
|
| 224 |
+
|
| 225 |
+
_For more examples, please refer to the [Documentation](https://example.com)_
|
| 226 |
+
|
| 227 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
<!-- ROADMAP -->
|
| 232 |
+
## Roadmap
|
| 233 |
+
|
| 234 |
+
- [ ] Feature 1
|
| 235 |
+
- [ ] Feature 2
|
| 236 |
+
- [ ] Feature 3
|
| 237 |
+
- [ ] Nested Feature
|
| 238 |
+
|
| 239 |
+
See the [open issues](https://github.com/github_username/repo_name/issues) for a full list of proposed features (and known issues).
|
| 240 |
+
|
| 241 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
<!-- CONTRIBUTING -->
|
| 246 |
+
## Contributing
|
| 247 |
+
|
| 248 |
+
Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**.
|
| 249 |
+
|
| 250 |
+
If you have a suggestion that would make this better, please fork the repo and create a pull request. You can also simply open an issue with the tag "enhancement".
|
| 251 |
+
Don't forget to give the project a star! Thanks again!
|
| 252 |
+
|
| 253 |
+
1. Fork the Project
|
| 254 |
+
2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
|
| 255 |
+
3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
|
| 256 |
+
4. Push to the Branch (`git push origin feature/AmazingFeature`)
|
| 257 |
+
5. Open a Pull Request
|
| 258 |
+
|
| 259 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 260 |
+
|
| 261 |
+
### Top contributors:
|
| 262 |
+
|
| 263 |
+
<a href="https://github.com/github_username/repo_name/graphs/contributors">
|
| 264 |
+
<img src="https://contrib.rocks/image?repo=github_username/repo_name" alt="contrib.rocks image" />
|
| 265 |
+
</a>
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
<!-- LICENSE -->
|
| 270 |
+
## License
|
| 271 |
+
|
| 272 |
+
Distributed under the project_license. See `LICENSE.txt` for more information.
|
| 273 |
+
|
| 274 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
<!-- CONTACT -->
|
| 279 |
+
## Contact
|
| 280 |
+
|
| 281 |
+
Your Name - [@twitter_handle](https://twitter.com/twitter_handle) - email@email_client.com
|
| 282 |
+
|
| 283 |
+
Project Link: [https://github.com/github_username/repo_name](https://github.com/github_username/repo_name)
|
| 284 |
+
|
| 285 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
<!-- ACKNOWLEDGMENTS -->
|
| 290 |
+
## Acknowledgments
|
| 291 |
+
|
| 292 |
+
* []()
|
| 293 |
+
* []()
|
| 294 |
+
* []()
|
| 295 |
+
|
| 296 |
+
<p align="right">(<a href="#readme-top">back to top</a>)</p>
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
<!-- MARKDOWN LINKS & IMAGES -->
|
| 301 |
+
<!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
|
| 302 |
+
[contributors-shield]: https://img.shields.io/github/contributors/github_username/repo_name.svg?style=for-the-badge
|
| 303 |
+
[contributors-url]: https://github.com/github_username/repo_name/graphs/contributors
|
| 304 |
+
[forks-shield]: https://img.shields.io/github/forks/github_username/repo_name.svg?style=for-the-badge
|
| 305 |
+
[forks-url]: https://github.com/github_username/repo_name/network/members
|
| 306 |
+
[stars-shield]: https://img.shields.io/github/stars/github_username/repo_name.svg?style=for-the-badge
|
| 307 |
+
[stars-url]: https://github.com/github_username/repo_name/stargazers
|
| 308 |
+
[issues-shield]: https://img.shields.io/github/issues/github_username/repo_name.svg?style=for-the-badge
|
| 309 |
+
[issues-url]: https://github.com/github_username/repo_name/issues
|
| 310 |
+
[license-shield]: https://img.shields.io/github/license/github_username/repo_name.svg?style=for-the-badge
|
| 311 |
+
[license-url]: https://github.com/github_username/repo_name/blob/master/LICENSE.txt
|
| 312 |
+
[linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
|
| 313 |
+
[linkedin-url]: https://linkedin.com/in/linkedin_username
|
| 314 |
+
[product-screenshot]: images/screenshot.png
|
| 315 |
+
[Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
|
| 316 |
+
<!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
|
| 317 |
+
[Next.js]: https://img.shields.io/badge/next.js-000000?style=for-the-badge&logo=nextdotjs&logoColor=white
|
| 318 |
+
[Next-url]: https://nextjs.org/
|
| 319 |
+
[React.js]: https://img.shields.io/badge/React-20232A?style=for-the-badge&logo=react&logoColor=61DAFB
|
| 320 |
+
[React-url]: https://reactjs.org/
|
| 321 |
+
[Vue.js]: https://img.shields.io/badge/Vue.js-35495E?style=for-the-badge&logo=vuedotjs&logoColor=4FC08D
|
| 322 |
+
[Vue-url]: https://vuejs.org/
|
| 323 |
+
[Angular.io]: https://img.shields.io/badge/Angular-DD0031?style=for-the-badge&logo=angular&logoColor=white
|
| 324 |
+
[Angular-url]: https://angular.io/
|
| 325 |
+
[Svelte.dev]: https://img.shields.io/badge/Svelte-4A4A55?style=for-the-badge&logo=svelte&logoColor=FF3E00
|
| 326 |
+
[Svelte-url]: https://svelte.dev/
|
| 327 |
+
[Laravel.com]: https://img.shields.io/badge/Laravel-FF2D20?style=for-the-badge&logo=laravel&logoColor=white
|
| 328 |
+
[Laravel-url]: https://laravel.com
|
| 329 |
+
[Bootstrap.com]: https://img.shields.io/badge/Bootstrap-563D7C?style=for-the-badge&logo=bootstrap&logoColor=white
|
| 330 |
+
[Bootstrap-url]: https://getbootstrap.com
|
| 331 |
+
[JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
|
| 332 |
+
[JQuery-url]: https://jquery.com
|
| 333 |
+
<!-- TODO: -->
|
| 334 |
+
[](#)
|
| 335 |
+
[](#)
|
| 336 |
+
[](#)
|
| 337 |
+
[](#)
|
| 338 |
+
[](#)
|
| 339 |
+
[](#)
|
| 340 |
+
[](#)[text](../projet_04/.gitignore)
|
hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
|
| 3 |
+
def greet(name):
|
| 4 |
+
return "Hello " + name + "!!"
|
| 5 |
+
|
| 6 |
+
demo = gr.Interface(fn=greet, inputs="text", outputs="text")
|
| 7 |
+
demo.launch()
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Déployer vers Hugging Face Spaces
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
deploy:
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
|
| 12 |
+
steps:
|
| 13 |
+
- name: Checkout repository
|
| 14 |
+
uses: actions/checkout@v4
|
| 15 |
+
|
| 16 |
+
- name: Setup Python
|
| 17 |
+
uses: actions/setup-python@v5
|
| 18 |
+
with:
|
| 19 |
+
python-version: "3.10"
|
| 20 |
+
|
| 21 |
+
- name: Install dependencies
|
| 22 |
+
run: |
|
| 23 |
+
python -m pip install --upgrade pip
|
| 24 |
+
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
| 25 |
+
|
| 26 |
+
- name: Push to Hugging Face Space
|
| 27 |
+
env:
|
| 28 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 29 |
+
run: |
|
| 30 |
+
git config --global user.email "actions@github.com"
|
| 31 |
+
git config --global user.name "GitHub Actions"
|
| 32 |
+
git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
|
| 33 |
+
rsync -av --exclude '.git' ./ hf_space/
|
| 34 |
+
cd hf_space
|
| 35 |
+
git add .
|
| 36 |
+
git commit -m "🚀 Auto-deploy from GitHub Actions"
|
| 37 |
+
git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.code-workspace
|
| 2 |
+
.venv/
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Projet 05
|
| 3 |
+
emoji: đź‘€
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.49.1
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
|
| 3 |
+
def greet(name):
|
| 4 |
+
return "Hello " + name + "!!"
|
| 5 |
+
|
| 6 |
+
demo = gr.Interface(fn=greet, inputs="text", outputs="text")
|
| 7 |
+
demo.launch()
|
hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from app.main import greet
|
| 3 |
+
|
| 4 |
+
def test_greet_returns_string():
|
| 5 |
+
"""Vérifie que la fonction retourne bien une chaîne de caractères."""
|
| 6 |
+
result = greet("Alice")
|
| 7 |
+
assert isinstance(result, str), "Le résultat doit être une chaîne de caractères."
|
| 8 |
+
|
| 9 |
+
def test_greet_output_content():
|
| 10 |
+
"""Vérifie que la fonction génère la phrase attendue."""
|
| 11 |
+
result = greet("Bob")
|
| 12 |
+
assert result == "Hello Bob!!", f"Résultat inattendu : {result}"
|
| 13 |
+
|
| 14 |
+
def test_greet_with_empty_string():
|
| 15 |
+
"""Vérifie le comportement si l’entrée est vide."""
|
| 16 |
+
result = greet("")
|
| 17 |
+
assert result == "Hello !!", "Le résultat doit gérer les entrées vides."
|
hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hf_space/hf_space/hf_space/hf_space/poetry.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hf_space/hf_space/hf_space/hf_space/poetry.toml
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[virtualenvs]
|
| 2 |
+
in-project = true
|
hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from projet_05 import config # noqa: F401
|
hf_space/hf_space/hf_space/hf_space/projet_05/config.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
from loguru import logger
|
| 5 |
+
|
| 6 |
+
# Load environment variables from .env file if it exists
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
# Paths
|
| 10 |
+
PROJ_ROOT = Path(__file__).resolve().parents[1]
|
| 11 |
+
logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")
|
| 12 |
+
|
| 13 |
+
DATA_DIR = PROJ_ROOT / "data"
|
| 14 |
+
RAW_DATA_DIR = DATA_DIR / "raw"
|
| 15 |
+
INTERIM_DATA_DIR = DATA_DIR / "interim"
|
| 16 |
+
PROCESSED_DATA_DIR = DATA_DIR / "processed"
|
| 17 |
+
EXTERNAL_DATA_DIR = DATA_DIR / "external"
|
| 18 |
+
|
| 19 |
+
MODELS_DIR = PROJ_ROOT / "models"
|
| 20 |
+
|
| 21 |
+
REPORTS_DIR = PROJ_ROOT / "reports"
|
| 22 |
+
FIGURES_DIR = REPORTS_DIR / "figures"
|
| 23 |
+
|
| 24 |
+
# If tqdm is installed, configure loguru with tqdm.write
|
| 25 |
+
# https://github.com/Delgan/loguru/issues/135
|
| 26 |
+
try:
|
| 27 |
+
from tqdm import tqdm
|
| 28 |
+
|
| 29 |
+
logger.remove(0)
|
| 30 |
+
logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
|
| 31 |
+
except ModuleNotFoundError:
|
| 32 |
+
pass
|
hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from loguru import logger
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
import typer
|
| 6 |
+
|
| 7 |
+
from projet_05.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
|
| 8 |
+
|
| 9 |
+
app = typer.Typer()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@app.command()
|
| 13 |
+
def main(
|
| 14 |
+
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
|
| 15 |
+
input_path: Path = RAW_DATA_DIR / "dataset.csv",
|
| 16 |
+
output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
|
| 17 |
+
# ----------------------------------------------
|
| 18 |
+
):
|
| 19 |
+
# ---- REPLACE THIS WITH YOUR OWN CODE ----
|
| 20 |
+
logger.info("Processing dataset...")
|
| 21 |
+
for i in tqdm(range(10), total=10):
|
| 22 |
+
if i == 5:
|
| 23 |
+
logger.info("Something happened for iteration 5.")
|
| 24 |
+
logger.success("Processing dataset complete.")
|
| 25 |
+
# -----------------------------------------
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
if __name__ == "__main__":
|
| 29 |
+
app()
|
hf_space/hf_space/hf_space/hf_space/projet_05/features.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from loguru import logger
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
import typer
|
| 6 |
+
|
| 7 |
+
from projet_05.config import PROCESSED_DATA_DIR
|
| 8 |
+
|
| 9 |
+
app = typer.Typer()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@app.command()
|
| 13 |
+
def main(
|
| 14 |
+
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
|
| 15 |
+
input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
|
| 16 |
+
output_path: Path = PROCESSED_DATA_DIR / "features.csv",
|
| 17 |
+
# -----------------------------------------
|
| 18 |
+
):
|
| 19 |
+
# ---- REPLACE THIS WITH YOUR OWN CODE ----
|
| 20 |
+
logger.info("Generating features from dataset...")
|
| 21 |
+
for i in tqdm(range(10), total=10):
|
| 22 |
+
if i == 5:
|
| 23 |
+
logger.info("Something happened for iteration 5.")
|
| 24 |
+
logger.success("Features generation complete.")
|
| 25 |
+
# -----------------------------------------
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
if __name__ == "__main__":
|
| 29 |
+
app()
|
hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from loguru import logger
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
import typer
|
| 6 |
+
|
| 7 |
+
from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
|
| 8 |
+
|
| 9 |
+
app = typer.Typer()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@app.command()
|
| 13 |
+
def main(
|
| 14 |
+
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
|
| 15 |
+
features_path: Path = PROCESSED_DATA_DIR / "test_features.csv",
|
| 16 |
+
model_path: Path = MODELS_DIR / "model.pkl",
|
| 17 |
+
predictions_path: Path = PROCESSED_DATA_DIR / "test_predictions.csv",
|
| 18 |
+
# -----------------------------------------
|
| 19 |
+
):
|
| 20 |
+
# ---- REPLACE THIS WITH YOUR OWN CODE ----
|
| 21 |
+
logger.info("Performing inference for model...")
|
| 22 |
+
for i in tqdm(range(10), total=10):
|
| 23 |
+
if i == 5:
|
| 24 |
+
logger.info("Something happened for iteration 5.")
|
| 25 |
+
logger.success("Inference complete.")
|
| 26 |
+
# -----------------------------------------
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
if __name__ == "__main__":
|
| 30 |
+
app()
|
hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from loguru import logger
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
import typer
|
| 6 |
+
|
| 7 |
+
from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
|
| 8 |
+
|
| 9 |
+
app = typer.Typer()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@app.command()
|
| 13 |
+
def main(
|
| 14 |
+
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
|
| 15 |
+
features_path: Path = PROCESSED_DATA_DIR / "features.csv",
|
| 16 |
+
labels_path: Path = PROCESSED_DATA_DIR / "labels.csv",
|
| 17 |
+
model_path: Path = MODELS_DIR / "model.pkl",
|
| 18 |
+
# -----------------------------------------
|
| 19 |
+
):
|
| 20 |
+
# ---- REPLACE THIS WITH YOUR OWN CODE ----
|
| 21 |
+
logger.info("Training some model...")
|
| 22 |
+
for i in tqdm(range(10), total=10):
|
| 23 |
+
if i == 5:
|
| 24 |
+
logger.info("Something happened for iteration 5.")
|
| 25 |
+
logger.success("Modeling training complete.")
|
| 26 |
+
# -----------------------------------------
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
if __name__ == "__main__":
|
| 30 |
+
app()
|
hf_space/hf_space/hf_space/hf_space/projet_05/plots.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from loguru import logger
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
import typer
|
| 6 |
+
|
| 7 |
+
from projet_05.config import FIGURES_DIR, PROCESSED_DATA_DIR
|
| 8 |
+
|
| 9 |
+
app = typer.Typer()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@app.command()
|
| 13 |
+
def main(
|
| 14 |
+
# ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
|
| 15 |
+
input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
|
| 16 |
+
output_path: Path = FIGURES_DIR / "plot.png",
|
| 17 |
+
# -----------------------------------------
|
| 18 |
+
):
|
| 19 |
+
# ---- REPLACE THIS WITH YOUR OWN CODE ----
|
| 20 |
+
logger.info("Generating plot from data...")
|
| 21 |
+
for i in tqdm(range(10), total=10):
|
| 22 |
+
if i == 5:
|
| 23 |
+
logger.info("Something happened for iteration 5.")
|
| 24 |
+
logger.success("Plot generation complete.")
|
| 25 |
+
# -----------------------------------------
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
if __name__ == "__main__":
|
| 29 |
+
app()
|
hf_space/hf_space/hf_space/hf_space/pyproject.toml
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["flit_core >=3.2,<4"]
|
| 3 |
+
build-backend = "flit_core.buildapi"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "projet_05"
|
| 7 |
+
version = "0.0.1"
|
| 8 |
+
description = "D\u00e9ployez un mod\u00e8le de Machine Learning"
|
| 9 |
+
authors = [
|
| 10 |
+
{ name = "St\u00e9phane Manet" },
|
| 11 |
+
]
|
| 12 |
+
license = { file = "LICENSE" }
|
| 13 |
+
readme = "README.md"
|
| 14 |
+
classifiers = [
|
| 15 |
+
"Programming Language :: Python :: 3",
|
| 16 |
+
"License :: OSI Approved :: MIT License"
|
| 17 |
+
]
|
| 18 |
+
dependencies = [
|
| 19 |
+
"loguru",
|
| 20 |
+
"mkdocs",
|
| 21 |
+
"pip",
|
| 22 |
+
"pytest",
|
| 23 |
+
"python-dotenv",
|
| 24 |
+
"ruff",
|
| 25 |
+
"tqdm",
|
| 26 |
+
"typer",
|
| 27 |
+
"imbalanced-learn (>=0.14.0,<0.15.0)",
|
| 28 |
+
"scikit-learn (>=1.4.2,<2.0.0)",
|
| 29 |
+
"matplotlib (>=3.10.7,<4.0.0)",
|
| 30 |
+
"numpy (>=2.3.4,<3.0.0)",
|
| 31 |
+
"pandas (>=2.3.3,<3.0.0)",
|
| 32 |
+
"pyyaml (>=6.0.3,<7.0.0)",
|
| 33 |
+
"scipy (>=1.16.3,<2.0.0)",
|
| 34 |
+
"seaborn (>=0.13.2,<0.14.0)",
|
| 35 |
+
"shap (>=0.49.1,<0.50.0)",
|
| 36 |
+
"gradio (>=5.49.1,<6.0.0)",
|
| 37 |
+
"joblib (>=1.4.2,<2.0.0)"
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
requires-python = ">=3.11,<3.13"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
[tool.ruff]
|
| 44 |
+
line-length = 99
|
| 45 |
+
src = ["projet_05"]
|
| 46 |
+
include = ["pyproject.toml", "projet_05/**/*.py"]
|
| 47 |
+
|
| 48 |
+
[tool.ruff.lint]
|
| 49 |
+
extend-select = ["I"] # Add import sorting
|
| 50 |
+
|
| 51 |
+
[tool.ruff.lint.isort]
|
| 52 |
+
known-first-party = ["projet_05"]
|
| 53 |
+
force-sort-within-sections = true
|
hf_space/hf_space/hf_space/hf_space/references/.gitkeep
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/hf_space/reports/.gitkeep
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/hf_space/tests/test_data.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def test_code_is_tested():
|
| 5 |
+
assert False
|
hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hf_space/hf_space/hf_space/projet_05/__init__.py
CHANGED
|
@@ -1 +1,4 @@
|
|
| 1 |
from projet_05 import config # noqa: F401
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from projet_05 import config # noqa: F401
|
| 2 |
+
from projet_05.settings import Settings, load_settings # noqa: F401
|
| 3 |
+
|
| 4 |
+
__all__ = ["config", "Settings", "load_settings"]
|
hf_space/hf_space/hf_space/projet_05/branding.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from functools import lru_cache
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Union
|
| 6 |
+
|
| 7 |
+
from scripts_projet04.brand.brand import ( # type: ignore[import-not-found]
|
| 8 |
+
Theme,
|
| 9 |
+
ThemeConfig,
|
| 10 |
+
configure_brand,
|
| 11 |
+
load_brand,
|
| 12 |
+
make_diverging_cmap,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
ROOT_DIR = Path(__file__).resolve().parents[1]
|
| 16 |
+
DEFAULT_BRAND_PATH = ROOT_DIR / "scripts_projet04" / "brand" / "brand.yml"
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _resolve_path(path: Union[str, Path, None]) -> Path:
|
| 20 |
+
if path is None:
|
| 21 |
+
return DEFAULT_BRAND_PATH
|
| 22 |
+
return Path(path).expanduser().resolve()
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@lru_cache(maxsize=1)
|
| 26 |
+
def load_brand_config(path: Union[str, Path, None] = None) -> ThemeConfig:
|
| 27 |
+
"""Load the brand YAML once and return the parsed ThemeConfig."""
|
| 28 |
+
cfg_path = _resolve_path(path)
|
| 29 |
+
return load_brand(cfg_path)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@lru_cache(maxsize=1)
|
| 33 |
+
def apply_brand_theme(path: Union[str, Path, None] = None) -> ThemeConfig:
|
| 34 |
+
"""
|
| 35 |
+
Apply the OpenClassrooms/TechNova brand theme globally.
|
| 36 |
+
|
| 37 |
+
Returns the ThemeConfig so callers can inspect colors if needed.
|
| 38 |
+
"""
|
| 39 |
+
cfg_path = _resolve_path(path)
|
| 40 |
+
cfg = configure_brand(cfg_path)
|
| 41 |
+
Theme.apply()
|
| 42 |
+
return cfg
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
__all__ = [
|
| 46 |
+
"Theme",
|
| 47 |
+
"ThemeConfig",
|
| 48 |
+
"apply_brand_theme",
|
| 49 |
+
"load_brand_config",
|
| 50 |
+
"make_diverging_cmap",
|
| 51 |
+
"DEFAULT_BRAND_PATH",
|
| 52 |
+
]
|
hf_space/hf_space/hf_space/projet_05/dataset.py
CHANGED
|
@@ -1,28 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
|
|
|
|
|
|
|
| 3 |
from loguru import logger
|
| 4 |
-
from tqdm import tqdm
|
| 5 |
import typer
|
| 6 |
|
| 7 |
-
from projet_05.config import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
|
|
|
|
|
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
@app.command()
|
| 13 |
def main(
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
):
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
logger.success("Processing dataset complete.")
|
| 25 |
-
# -----------------------------------------
|
| 26 |
|
| 27 |
|
| 28 |
if __name__ == "__main__":
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import sqlite3
|
| 4 |
from pathlib import Path
|
| 5 |
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
from loguru import logger
|
|
|
|
| 9 |
import typer
|
| 10 |
|
| 11 |
+
from projet_05.config import INTERIM_DATA_DIR
|
| 12 |
+
from projet_05.settings import Settings, load_settings
|
| 13 |
+
|
| 14 |
+
app = typer.Typer(help="Préparation et fusion des données sources.")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# ---------------------------------------------------------------------------
|
| 18 |
+
# Utilitaires
|
| 19 |
+
# ---------------------------------------------------------------------------
|
| 20 |
+
def safe_read_csv(path: Path, *, dtype=None) -> pd.DataFrame:
|
| 21 |
+
"""Read a CSV file and return an empty frame when it fails."""
|
| 22 |
+
try:
|
| 23 |
+
logger.info("Lecture du fichier {}", path)
|
| 24 |
+
return pd.read_csv(path, dtype=dtype)
|
| 25 |
+
except FileNotFoundError:
|
| 26 |
+
logger.warning("Fichier absent: {}", path)
|
| 27 |
+
return pd.DataFrame()
|
| 28 |
+
except Exception as exc: # pragma: no cover - log + empty dataframe
|
| 29 |
+
logger.error("Impossible de lire {} ({})", path, exc)
|
| 30 |
+
return pd.DataFrame()
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def clean_text_values(df: pd.DataFrame) -> pd.DataFrame:
|
| 34 |
+
"""Normalize textual values that often materialize missing values."""
|
| 35 |
+
replace_tokens = [
|
| 36 |
+
"",
|
| 37 |
+
" ",
|
| 38 |
+
" ",
|
| 39 |
+
" ",
|
| 40 |
+
"nan",
|
| 41 |
+
"NaN",
|
| 42 |
+
"NAN",
|
| 43 |
+
"None",
|
| 44 |
+
"JE ne sais pas",
|
| 45 |
+
"je ne sais pas",
|
| 46 |
+
"Je ne sais pas",
|
| 47 |
+
"Unknow",
|
| 48 |
+
"Unknown",
|
| 49 |
+
"non pertinent",
|
| 50 |
+
"Non pertinent",
|
| 51 |
+
"NON PERTINENT",
|
| 52 |
+
]
|
| 53 |
+
normalized = df.copy()
|
| 54 |
+
normalized = normalized.replace(replace_tokens, np.nan)
|
| 55 |
+
|
| 56 |
+
for column in normalized.select_dtypes(include="object"):
|
| 57 |
+
normalized[column] = (
|
| 58 |
+
normalized[column].replace(replace_tokens, np.nan).astype("string").str.strip()
|
| 59 |
+
)
|
| 60 |
+
return normalized
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _harmonize_id_column(df: pd.DataFrame, column: str, *, digits_only: bool = True) -> pd.DataFrame:
|
| 64 |
+
data = df.copy()
|
| 65 |
+
if column not in data.columns:
|
| 66 |
+
return data
|
| 67 |
+
|
| 68 |
+
if digits_only:
|
| 69 |
+
extracted = data[column].astype(str).str.extract(r"(\\d+)")
|
| 70 |
+
data[column] = pd.to_numeric(extracted[0], errors="coerce")
|
| 71 |
+
data[column] = pd.to_numeric(data[column], errors="coerce").astype("Int64")
|
| 72 |
+
return data
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def _rename_column(df: pd.DataFrame, source: str, target: str) -> pd.DataFrame:
|
| 76 |
+
if source not in df.columns:
|
| 77 |
+
return df
|
| 78 |
+
return df.rename(columns={source: target})
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _log_id_diagnostics(df: pd.DataFrame, *, name: str, col_id: str) -> None:
|
| 82 |
+
if col_id not in df.columns:
|
| 83 |
+
logger.warning("La colonne {} est absente du fichier {}.", col_id, name)
|
| 84 |
+
return
|
| 85 |
+
total = len(df)
|
| 86 |
+
uniques = df[col_id].nunique(dropna=True)
|
| 87 |
+
duplicates = total - uniques
|
| 88 |
+
logger.info(
|
| 89 |
+
"{name}: {total} lignes | {uniques} identifiants uniques | {duplicates} doublons",
|
| 90 |
+
name=name,
|
| 91 |
+
total=total,
|
| 92 |
+
uniques=uniques,
|
| 93 |
+
duplicates=duplicates,
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _persist_sql_trace(df_dict: dict[str, pd.DataFrame], settings: Settings) -> pd.DataFrame:
|
| 98 |
+
"""
|
| 99 |
+
Reproduire la fusion SQL décrite dans le notebook.
|
| 100 |
|
| 101 |
+
Chaque DataFrame est stocké dans une base SQLite éphémère pour
|
| 102 |
+
conserver une traçabilité de la requête exécutée.
|
| 103 |
+
"""
|
| 104 |
+
db_path = settings.db_file
|
| 105 |
+
sql_path = settings.sql_file
|
| 106 |
|
| 107 |
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
| 108 |
+
sql_path.parent.mkdir(parents=True, exist_ok=True)
|
| 109 |
|
| 110 |
+
if db_path.exists():
|
| 111 |
+
db_path.unlink()
|
| 112 |
+
|
| 113 |
+
query = f"""
|
| 114 |
+
SELECT *
|
| 115 |
+
FROM sirh
|
| 116 |
+
INNER JOIN evaluation USING ({settings.col_id})
|
| 117 |
+
INNER JOIN sond USING ({settings.col_id});
|
| 118 |
+
""".strip()
|
| 119 |
+
|
| 120 |
+
with db_path.open("wb") as _:
|
| 121 |
+
pass # just ensure the file exists for sqlite on some platforms
|
| 122 |
+
|
| 123 |
+
with sqlite3.connect(db_path) as conn:
|
| 124 |
+
for name, frame in df_dict.items():
|
| 125 |
+
frame.to_sql(name, conn, index=False, if_exists="replace")
|
| 126 |
+
merged = pd.read_sql_query(query, conn)
|
| 127 |
+
|
| 128 |
+
sql_path.write_text(query, encoding="utf-8")
|
| 129 |
+
return merged
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def build_dataset(settings: Settings) -> pd.DataFrame:
|
| 133 |
+
"""Load, clean, harmonize and merge the three raw sources."""
|
| 134 |
+
sirh = clean_text_values(
|
| 135 |
+
safe_read_csv(settings.path_sirh).pipe(
|
| 136 |
+
_harmonize_id_column, settings.col_id, digits_only=True
|
| 137 |
+
)
|
| 138 |
+
)
|
| 139 |
+
evaluation = clean_text_values(
|
| 140 |
+
safe_read_csv(settings.path_eval)
|
| 141 |
+
.pipe(_rename_column, "eval_number", settings.col_id)
|
| 142 |
+
.pipe(_harmonize_id_column, settings.col_id, digits_only=True)
|
| 143 |
+
)
|
| 144 |
+
sond = clean_text_values(
|
| 145 |
+
safe_read_csv(settings.path_sondage)
|
| 146 |
+
.pipe(_rename_column, "code_sondage", settings.col_id)
|
| 147 |
+
.pipe(_harmonize_id_column, settings.col_id, digits_only=True)
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
for name, frame in {"sirh": sirh, "evaluation": evaluation, "sond": sond}.items():
|
| 151 |
+
_log_id_diagnostics(frame, name=name, col_id=settings.col_id)
|
| 152 |
+
|
| 153 |
+
frames = {
|
| 154 |
+
"sirh": sirh,
|
| 155 |
+
"evaluation": evaluation,
|
| 156 |
+
"sond": sond,
|
| 157 |
+
}
|
| 158 |
+
merged = _persist_sql_trace(frames, settings)
|
| 159 |
+
|
| 160 |
+
missing_cols = [settings.col_id] if settings.col_id not in merged.columns else []
|
| 161 |
+
if missing_cols:
|
| 162 |
+
raise KeyError(
|
| 163 |
+
f"La colonne {settings.col_id} est absente de la fusion finale. "
|
| 164 |
+
"Vérifiez vos fichiers sources."
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
logger.success("Fusion réalisée: {} lignes / {} colonnes", *merged.shape)
|
| 168 |
+
return merged
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def save_dataset(df: pd.DataFrame, output_path: Path) -> None:
|
| 172 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 173 |
+
df.to_csv(output_path, index=False)
|
| 174 |
+
logger.success("Fichier fusionné sauvegardé dans {}", output_path)
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
# ---------------------------------------------------------------------------
|
| 178 |
+
# CLI
|
| 179 |
+
# ---------------------------------------------------------------------------
|
| 180 |
@app.command()
|
| 181 |
def main(
|
| 182 |
+
settings_path: Path = typer.Option(
|
| 183 |
+
None,
|
| 184 |
+
"--settings",
|
| 185 |
+
"-s",
|
| 186 |
+
help="Chemin vers un fichier settings.yml personnalisé.",
|
| 187 |
+
),
|
| 188 |
+
output_path: Path = typer.Option(
|
| 189 |
+
INTERIM_DATA_DIR / "merged.csv",
|
| 190 |
+
"--output",
|
| 191 |
+
"-o",
|
| 192 |
+
help="Chemin de sortie du dataset fusionné.",
|
| 193 |
+
),
|
| 194 |
):
|
| 195 |
+
"""Entrypoint Typer pour reproduire la fusion des données brutes."""
|
| 196 |
+
|
| 197 |
+
settings = load_settings(settings_path) if settings_path else load_settings()
|
| 198 |
+
df = build_dataset(settings)
|
| 199 |
+
save_dataset(df, output_path)
|
|
|
|
|
|
|
| 200 |
|
| 201 |
|
| 202 |
if __name__ == "__main__":
|
hf_space/hf_space/hf_space/projet_05/explainability.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Tuple
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from loguru import logger
|
| 9 |
+
|
| 10 |
+
from projet_05.branding import Theme, apply_brand_theme, make_diverging_cmap
|
| 11 |
+
from scripts_projet04.manet_projet04.shap_generator import ( # type: ignore[import-not-found]
|
| 12 |
+
shap_global,
|
| 13 |
+
shap_local,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
apply_brand_theme()
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def _shape_array(values) -> np.ndarray:
|
| 20 |
+
if hasattr(values, "values"):
|
| 21 |
+
arr = np.array(values.values)
|
| 22 |
+
else:
|
| 23 |
+
arr = np.array(values)
|
| 24 |
+
return np.nan_to_num(arr, copy=False)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def compute_shap_summary(
|
| 28 |
+
pipeline,
|
| 29 |
+
X: pd.DataFrame,
|
| 30 |
+
y: pd.Series,
|
| 31 |
+
*,
|
| 32 |
+
max_samples: int = 500,
|
| 33 |
+
) -> Tuple[pd.DataFrame | None, object | None]:
|
| 34 |
+
"""
|
| 35 |
+
Reuse the historical `shap_global` helper to build the plots and a tabular summary.
|
| 36 |
+
|
| 37 |
+
Returns
|
| 38 |
+
-------
|
| 39 |
+
summary_df : pd.DataFrame | None
|
| 40 |
+
Moyenne absolue des valeurs SHAP (ordre décroissant).
|
| 41 |
+
shap_values : shap.Explanation | None
|
| 42 |
+
Objet renvoyé par shap_global pour des analyses locales ultérieures.
|
| 43 |
+
"""
|
| 44 |
+
cmap = make_diverging_cmap(Theme.PRIMARY, Theme.SECONDARY)
|
| 45 |
+
shap_values, _, feature_names = shap_global(
|
| 46 |
+
pipeline,
|
| 47 |
+
X,
|
| 48 |
+
y,
|
| 49 |
+
sample_size=max_samples,
|
| 50 |
+
cmap=cmap,
|
| 51 |
+
)
|
| 52 |
+
if shap_values is None or feature_names is None:
|
| 53 |
+
logger.warning("Impossible de générer les résumés SHAP.")
|
| 54 |
+
return None, None
|
| 55 |
+
|
| 56 |
+
shap_array = _shape_array(shap_values)
|
| 57 |
+
if shap_array.ndim == 1:
|
| 58 |
+
shap_array = shap_array.reshape(-1, 1)
|
| 59 |
+
mean_abs = np.abs(shap_array).mean(axis=0)
|
| 60 |
+
summary = (
|
| 61 |
+
pd.DataFrame({"feature": list(feature_names), "mean_abs_shap": mean_abs})
|
| 62 |
+
.sort_values("mean_abs_shap", ascending=False)
|
| 63 |
+
.reset_index(drop=True)
|
| 64 |
+
)
|
| 65 |
+
return summary, shap_values
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def save_shap_summary(summary: pd.DataFrame, output_path: Path) -> None:
|
| 69 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 70 |
+
summary.to_csv(output_path, index=False)
|
| 71 |
+
logger.info("Résumé SHAP sauvegardé dans {}", output_path)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def export_local_explanations(
|
| 75 |
+
pipeline,
|
| 76 |
+
shap_values,
|
| 77 |
+
X: pd.DataFrame,
|
| 78 |
+
custom_index: int | None = None,
|
| 79 |
+
) -> None:
|
| 80 |
+
"""
|
| 81 |
+
Génère trois cas d'usage par défaut (impact max, risque max, risque min)
|
| 82 |
+
et un indice custom optionnel pour la trace historique.
|
| 83 |
+
"""
|
| 84 |
+
if shap_values is None:
|
| 85 |
+
return
|
| 86 |
+
|
| 87 |
+
shap_array = _shape_array(shap_values)
|
| 88 |
+
idx_impact = int(np.argmax(np.sum(np.abs(shap_array), axis=1)))
|
| 89 |
+
shap_local(idx_impact, shap_values)
|
| 90 |
+
|
| 91 |
+
y_proba_all = pipeline.predict_proba(X)[:, 1]
|
| 92 |
+
idx_highrisk = int(np.argmax(y_proba_all))
|
| 93 |
+
shap_local(idx_highrisk, shap_values)
|
| 94 |
+
|
| 95 |
+
idx_lowrisk = int(np.argmin(y_proba_all))
|
| 96 |
+
shap_local(idx_lowrisk, shap_values, text_scale=0.6)
|
| 97 |
+
|
| 98 |
+
if custom_index is not None:
|
| 99 |
+
shap_local(custom_index, shap_values, max_display=8)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
__all__ = ["compute_shap_summary", "save_shap_summary", "export_local_explanations"]
|
hf_space/hf_space/hf_space/projet_05/features.py
CHANGED
|
@@ -1,28 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
|
|
|
|
|
|
|
| 3 |
from loguru import logger
|
| 4 |
-
from tqdm import tqdm
|
| 5 |
import typer
|
| 6 |
|
| 7 |
-
from projet_05.config import PROCESSED_DATA_DIR
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
@app.command()
|
| 13 |
def main(
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
):
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
# -----------------------------------------
|
| 26 |
|
| 27 |
|
| 28 |
if __name__ == "__main__":
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from datetime import datetime
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pandas as pd
|
| 9 |
from loguru import logger
|
|
|
|
| 10 |
import typer
|
| 11 |
|
| 12 |
+
from projet_05.config import INTERIM_DATA_DIR, PROCESSED_DATA_DIR
|
| 13 |
+
from projet_05.settings import Settings, load_settings
|
| 14 |
+
|
| 15 |
+
app = typer.Typer(help="Génération des features et nettoyage de la cible.")
|
| 16 |
+
|
| 17 |
+
TARGET_MAPPING = {
|
| 18 |
+
"1": 1,
|
| 19 |
+
"0": 0,
|
| 20 |
+
"oui": 1,
|
| 21 |
+
"non": 0,
|
| 22 |
+
"true": 1,
|
| 23 |
+
"false": 0,
|
| 24 |
+
"quitte": 1,
|
| 25 |
+
"reste": 0,
|
| 26 |
+
"yes": 1,
|
| 27 |
+
"no": 0,
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# ---------------------------------------------------------------------------
|
| 32 |
+
# Utilitaires cœur de pipeline
|
| 33 |
+
# ---------------------------------------------------------------------------
|
| 34 |
+
def _load_merged_dataset(path: Path) -> pd.DataFrame:
|
| 35 |
+
if not path.exists():
|
| 36 |
+
raise FileNotFoundError(
|
| 37 |
+
f"Le fichier fusionné {path} est introuvable. Lancez `python projet_05/dataset.py` d'abord."
|
| 38 |
+
)
|
| 39 |
+
logger.info("Chargement du dataset fusionné depuis {}", path)
|
| 40 |
+
return pd.read_csv(path)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def _normalize_target(df: pd.DataFrame, settings: Settings) -> pd.DataFrame:
|
| 44 |
+
if settings.target not in df.columns:
|
| 45 |
+
raise KeyError(f"La variable cible '{settings.target}' est absente du fichier.")
|
| 46 |
+
|
| 47 |
+
normalized = (
|
| 48 |
+
df[settings.target]
|
| 49 |
+
.astype(str)
|
| 50 |
+
.str.strip()
|
| 51 |
+
.str.lower()
|
| 52 |
+
.map(TARGET_MAPPING)
|
| 53 |
+
)
|
| 54 |
+
df = df.copy()
|
| 55 |
+
df[settings.target] = normalized
|
| 56 |
+
before = len(df)
|
| 57 |
+
df = df[df[settings.target].isin([0, 1])].copy()
|
| 58 |
+
dropped = before - len(df)
|
| 59 |
+
if dropped:
|
| 60 |
+
logger.warning("Suppression de {} lignes avec une cible invalide.", dropped)
|
| 61 |
+
df[settings.target] = df[settings.target].astype(int)
|
| 62 |
+
return df
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _safe_ratio(df: pd.DataFrame, numerator: str, denominator: str, output: str) -> None:
|
| 66 |
+
if numerator not in df.columns or denominator not in df.columns:
|
| 67 |
+
return
|
| 68 |
+
denominator_series = df[denominator].replace({0: np.nan})
|
| 69 |
+
df[output] = df[numerator] / denominator_series
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def _engineer_features(df: pd.DataFrame, settings: Settings) -> pd.DataFrame:
|
| 73 |
+
engineered = df.copy()
|
| 74 |
|
| 75 |
+
col = "augementation_salaire_precedente"
|
| 76 |
+
if col in engineered:
|
| 77 |
+
engineered[col] = (
|
| 78 |
+
engineered[col]
|
| 79 |
+
.astype(str)
|
| 80 |
+
.str.replace("%", "", regex=False)
|
| 81 |
+
.str.replace(",", ".", regex=False)
|
| 82 |
+
.str.strip()
|
| 83 |
+
)
|
| 84 |
+
engineered[col] = pd.to_numeric(engineered[col], errors="coerce") / 100
|
| 85 |
|
| 86 |
+
_safe_ratio(engineered, "augementation_salaire_precedente", "revenu_mensuel", "augmentation_par_revenu")
|
| 87 |
+
_safe_ratio(engineered, "annees_dans_le_poste_actuel", "annee_experience_totale", "annee_sur_poste_par_experience")
|
| 88 |
+
_safe_ratio(engineered, "nb_formations_suivies", "annee_experience_totale", "nb_formation_par_experience")
|
| 89 |
+
_safe_ratio(
|
| 90 |
+
engineered, "annees_depuis_la_derniere_promotion", "annee_experience_totale", "dern_promo_par_experience"
|
| 91 |
+
)
|
| 92 |
|
| 93 |
+
if settings.sat_cols:
|
| 94 |
+
existing = [col for col in settings.sat_cols if col in engineered.columns]
|
| 95 |
+
if existing:
|
| 96 |
+
engineered["score_moyen_satisfaction"] = engineered[existing].mean(axis=1)
|
| 97 |
+
|
| 98 |
+
if "note_evaluation_actuelle" in engineered.columns and "note_evaluation_precedente" in engineered.columns:
|
| 99 |
+
engineered["evolution_note"] = (
|
| 100 |
+
engineered["note_evaluation_actuelle"] - engineered["note_evaluation_precedente"]
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
return engineered
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def build_features(settings: Settings, *, input_path: Path) -> pd.DataFrame:
|
| 107 |
+
df = _load_merged_dataset(input_path)
|
| 108 |
+
df = _normalize_target(df, settings)
|
| 109 |
+
df = _engineer_features(df, settings)
|
| 110 |
+
return df
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def save_features(df: pd.DataFrame, output_path: Path) -> None:
|
| 114 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 115 |
+
df.to_csv(output_path, index=False)
|
| 116 |
+
logger.success("Dataset enrichi sauvegardé dans {}", output_path)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def save_schema(settings: Settings, output_path: Path) -> None:
|
| 120 |
+
schema = {
|
| 121 |
+
"target": settings.target,
|
| 122 |
+
"col_id": settings.col_id,
|
| 123 |
+
"numerical_features": list(settings.num_cols),
|
| 124 |
+
"categorical_features": list(settings.cat_cols),
|
| 125 |
+
"satisfaction_features": list(settings.sat_cols),
|
| 126 |
+
"generated_at": datetime.utcnow().isoformat(),
|
| 127 |
+
}
|
| 128 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 129 |
+
output_path.write_text(json.dumps(schema, indent=2), encoding="utf-8")
|
| 130 |
+
logger.info("Schéma sauvegardé dans {}", output_path)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
# ---------------------------------------------------------------------------
|
| 134 |
+
# CLI
|
| 135 |
+
# ---------------------------------------------------------------------------
|
| 136 |
@app.command()
|
| 137 |
def main(
|
| 138 |
+
settings_path: Path = typer.Option(
|
| 139 |
+
None,
|
| 140 |
+
"--settings",
|
| 141 |
+
"-s",
|
| 142 |
+
help="Chemin optionnel vers un fichier settings.yml personnalisé.",
|
| 143 |
+
),
|
| 144 |
+
input_path: Path = typer.Option(
|
| 145 |
+
INTERIM_DATA_DIR / "merged.csv",
|
| 146 |
+
"--input",
|
| 147 |
+
"-i",
|
| 148 |
+
help="Chemin du fichier issu de la fusion.",
|
| 149 |
+
),
|
| 150 |
+
output_path: Path = typer.Option(
|
| 151 |
+
PROCESSED_DATA_DIR / "dataset.csv",
|
| 152 |
+
"--output",
|
| 153 |
+
"-o",
|
| 154 |
+
help="Chemin du fichier enrichi.",
|
| 155 |
+
),
|
| 156 |
+
schema_path: Path = typer.Option(
|
| 157 |
+
PROCESSED_DATA_DIR / "schema.json",
|
| 158 |
+
"--schema",
|
| 159 |
+
help="Chemin de sauvegarde du schéma de features.",
|
| 160 |
+
),
|
| 161 |
):
|
| 162 |
+
"""Pipeline Typer pour préparer le dataset enrichi."""
|
| 163 |
+
|
| 164 |
+
settings = load_settings(settings_path) if settings_path else load_settings()
|
| 165 |
+
df = build_features(settings, input_path=input_path)
|
| 166 |
+
save_features(df, output_path)
|
| 167 |
+
save_schema(settings, schema_path)
|
|
|
|
| 168 |
|
| 169 |
|
| 170 |
if __name__ == "__main__":
|
hf_space/hf_space/hf_space/projet_05/modeling/predict.py
CHANGED
|
@@ -1,29 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
|
|
|
|
|
|
|
|
|
|
| 3 |
from loguru import logger
|
| 4 |
-
from tqdm import tqdm
|
| 5 |
import typer
|
| 6 |
|
| 7 |
from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
|
| 8 |
|
| 9 |
-
app = typer.Typer()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
@app.command()
|
| 13 |
def main(
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
):
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
if __name__ == "__main__":
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
from pathlib import Path
|
| 5 |
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from joblib import load
|
| 9 |
from loguru import logger
|
|
|
|
| 10 |
import typer
|
| 11 |
|
| 12 |
from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
|
| 13 |
|
| 14 |
+
app = typer.Typer(help="Inférence à partir du pipeline entraîné.")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def load_pipeline(model_path: Path):
|
| 18 |
+
if not model_path.exists():
|
| 19 |
+
raise FileNotFoundError(f"Modèle introuvable: {model_path}")
|
| 20 |
+
logger.info("Chargement du modèle {}", model_path)
|
| 21 |
+
return load(model_path)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def load_metadata(metadata_path: Path) -> dict:
|
| 25 |
+
if not metadata_path.exists():
|
| 26 |
+
raise FileNotFoundError(f"Fichier métadonnées introuvable: {metadata_path}")
|
| 27 |
+
return json.loads(metadata_path.read_text(encoding="utf-8"))
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def run_inference(
|
| 31 |
+
df: pd.DataFrame,
|
| 32 |
+
pipeline,
|
| 33 |
+
threshold: float,
|
| 34 |
+
drop_columns: list[str] | None = None,
|
| 35 |
+
required_features: list[str] | None = None,
|
| 36 |
+
) -> pd.DataFrame:
|
| 37 |
+
features = df.drop(columns=drop_columns or [], errors="ignore")
|
| 38 |
+
if required_features:
|
| 39 |
+
for col in required_features:
|
| 40 |
+
if col not in features.columns:
|
| 41 |
+
features[col] = np.nan
|
| 42 |
+
features = features[required_features]
|
| 43 |
+
proba = pipeline.predict_proba(features)[:, 1]
|
| 44 |
+
predictions = (proba >= threshold).astype(int)
|
| 45 |
+
output = df.copy()
|
| 46 |
+
output["proba_depart"] = proba
|
| 47 |
+
output["prediction"] = predictions
|
| 48 |
+
return output
|
| 49 |
|
| 50 |
|
| 51 |
@app.command()
|
| 52 |
def main(
|
| 53 |
+
model_path: Path = typer.Option(
|
| 54 |
+
MODELS_DIR / "best_model.joblib",
|
| 55 |
+
"--model-path",
|
| 56 |
+
help="Pipeline entraîné sauvegardé via train.py",
|
| 57 |
+
),
|
| 58 |
+
metadata_path: Path = typer.Option(
|
| 59 |
+
MODELS_DIR / "best_model_meta.json",
|
| 60 |
+
"--metadata-path",
|
| 61 |
+
help="Fichier JSON contenant le seuil optimal.",
|
| 62 |
+
),
|
| 63 |
+
features_path: Path = typer.Option(
|
| 64 |
+
PROCESSED_DATA_DIR / "dataset.csv",
|
| 65 |
+
"--features",
|
| 66 |
+
"-f",
|
| 67 |
+
help="Jeu de features sur lequel produire des prédictions.",
|
| 68 |
+
),
|
| 69 |
+
predictions_path: Path = typer.Option(
|
| 70 |
+
PROCESSED_DATA_DIR / "predictions.csv",
|
| 71 |
+
"--output",
|
| 72 |
+
"-o",
|
| 73 |
+
help="Chemin de sauvegarde des prédictions.",
|
| 74 |
+
),
|
| 75 |
):
|
| 76 |
+
"""Entrypoint Typer pour générer un fichier de prédictions."""
|
| 77 |
+
|
| 78 |
+
pipeline = load_pipeline(model_path)
|
| 79 |
+
metadata = load_metadata(metadata_path)
|
| 80 |
+
threshold = metadata.get("best_threshold", 0.5)
|
| 81 |
+
features_cfg = metadata.get("features", {})
|
| 82 |
+
required_features = (features_cfg.get("numerical") or []) + (features_cfg.get("categorical") or [])
|
| 83 |
+
df = pd.read_csv(features_path)
|
| 84 |
+
logger.info("Dataset chargé: {} lignes", len(df))
|
| 85 |
+
|
| 86 |
+
target_col = metadata.get("target")
|
| 87 |
+
predictions = run_inference(
|
| 88 |
+
df,
|
| 89 |
+
pipeline,
|
| 90 |
+
threshold,
|
| 91 |
+
drop_columns=[target_col] if target_col else None,
|
| 92 |
+
required_features=required_features or None,
|
| 93 |
+
)
|
| 94 |
+
predictions_path.parent.mkdir(parents=True, exist_ok=True)
|
| 95 |
+
predictions.to_csv(predictions_path, index=False)
|
| 96 |
+
logger.success("Prédictions sauvegardées dans {}", predictions_path)
|
| 97 |
|
| 98 |
|
| 99 |
if __name__ == "__main__":
|
hf_space/hf_space/hf_space/projet_05/modeling/train.py
CHANGED
|
@@ -1,29 +1,342 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from pathlib import Path
|
|
|
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from loguru import logger
|
| 4 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
import typer
|
| 6 |
|
| 7 |
-
from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
@app.command()
|
| 13 |
def main(
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
):
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
if __name__ == "__main__":
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
from pathlib import Path
|
| 6 |
+
from typing import Dict, Tuple
|
| 7 |
|
| 8 |
+
import numpy as np
|
| 9 |
+
import pandas as pd
|
| 10 |
+
from imblearn.over_sampling import SMOTE
|
| 11 |
+
from imblearn.pipeline import Pipeline as ImbPipeline
|
| 12 |
+
from joblib import dump
|
| 13 |
from loguru import logger
|
| 14 |
+
from sklearn.base import clone
|
| 15 |
+
from sklearn.compose import ColumnTransformer
|
| 16 |
+
from sklearn.ensemble import RandomForestClassifier
|
| 17 |
+
from sklearn.impute import SimpleImputer
|
| 18 |
+
from sklearn.linear_model import LogisticRegression
|
| 19 |
+
from sklearn.metrics import (
|
| 20 |
+
f1_score,
|
| 21 |
+
precision_recall_curve,
|
| 22 |
+
precision_score,
|
| 23 |
+
recall_score,
|
| 24 |
+
roc_auc_score,
|
| 25 |
+
)
|
| 26 |
+
from sklearn.model_selection import GridSearchCV, StratifiedKFold, cross_val_predict
|
| 27 |
+
from sklearn.pipeline import Pipeline
|
| 28 |
+
from sklearn.preprocessing import OneHotEncoder, StandardScaler
|
| 29 |
import typer
|
| 30 |
|
| 31 |
+
from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR, REPORTS_DIR
|
| 32 |
+
from projet_05.explainability import (
|
| 33 |
+
compute_shap_summary,
|
| 34 |
+
export_local_explanations,
|
| 35 |
+
save_shap_summary,
|
| 36 |
+
)
|
| 37 |
+
from projet_05.settings import Settings, load_settings
|
| 38 |
+
|
| 39 |
+
app = typer.Typer(help="Entraînement et sélection du meilleur modèle.")
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _clean_values(payload: dict) -> dict:
|
| 43 |
+
def _convert(value):
|
| 44 |
+
if isinstance(value, (np.floating, np.integer)):
|
| 45 |
+
return value.item()
|
| 46 |
+
return value
|
| 47 |
+
|
| 48 |
+
return {key: _convert(value) for key, value in payload.items()}
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
@dataclass
|
| 52 |
+
class ModelResult:
|
| 53 |
+
name: str
|
| 54 |
+
best_estimator: ImbPipeline
|
| 55 |
+
best_params: dict
|
| 56 |
+
best_threshold: float
|
| 57 |
+
metrics: Dict[str, float]
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def load_processed_dataset(path: Path) -> pd.DataFrame:
|
| 61 |
+
if not path.exists():
|
| 62 |
+
raise FileNotFoundError(
|
| 63 |
+
f"Dataset traité introuvable ({path}). Lancez `python projet_05/features.py`."
|
| 64 |
+
)
|
| 65 |
+
logger.info("Chargement du dataset préparé depuis {}", path)
|
| 66 |
+
return pd.read_csv(path)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def split_features_target(df: pd.DataFrame, settings: Settings) -> Tuple[pd.DataFrame, pd.Series]:
|
| 70 |
+
if settings.target not in df.columns:
|
| 71 |
+
raise KeyError(f"La cible {settings.target} est absente du dataset.")
|
| 72 |
+
y = df[settings.target].astype(int)
|
| 73 |
+
drop_cols = [settings.target]
|
| 74 |
+
if settings.col_id in df.columns:
|
| 75 |
+
drop_cols.append(settings.col_id)
|
| 76 |
+
X = df.drop(columns=drop_cols, errors="ignore")
|
| 77 |
+
return X, y
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def build_preprocessor(settings: Settings, X: pd.DataFrame) -> ColumnTransformer:
|
| 81 |
+
numeric_features = [col for col in settings.num_cols if col in X.columns]
|
| 82 |
+
categorical_features = [col for col in settings.cat_cols if col in X.columns]
|
| 83 |
+
if not numeric_features:
|
| 84 |
+
numeric_features = X.select_dtypes(include="number").columns.tolist()
|
| 85 |
+
if not categorical_features:
|
| 86 |
+
categorical_features = X.select_dtypes(exclude="number").columns.tolist()
|
| 87 |
+
|
| 88 |
+
numeric_transformer = Pipeline(
|
| 89 |
+
steps=[
|
| 90 |
+
("imputer", SimpleImputer(strategy="median")),
|
| 91 |
+
("scaler", StandardScaler()),
|
| 92 |
+
]
|
| 93 |
+
)
|
| 94 |
+
categorical_transformer = Pipeline(
|
| 95 |
+
steps=[
|
| 96 |
+
("imputer", SimpleImputer(strategy="most_frequent")),
|
| 97 |
+
("encoder", OneHotEncoder(handle_unknown="ignore", sparse_output=False)),
|
| 98 |
+
]
|
| 99 |
+
)
|
| 100 |
+
transformers = []
|
| 101 |
+
if numeric_features:
|
| 102 |
+
transformers.append(("num", numeric_transformer, numeric_features))
|
| 103 |
+
if categorical_features:
|
| 104 |
+
transformers.append(("cat", categorical_transformer, categorical_features))
|
| 105 |
+
if not transformers:
|
| 106 |
+
raise ValueError("Aucune feature disponible pour l'entraînement.")
|
| 107 |
+
return ColumnTransformer(transformers=transformers)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def get_models(random_state: int):
|
| 111 |
+
return {
|
| 112 |
+
"LogReg_balanced": (
|
| 113 |
+
LogisticRegression(
|
| 114 |
+
max_iter=2000,
|
| 115 |
+
class_weight="balanced",
|
| 116 |
+
random_state=random_state,
|
| 117 |
+
),
|
| 118 |
+
[
|
| 119 |
+
{
|
| 120 |
+
"clf__solver": ["lbfgs"],
|
| 121 |
+
"clf__penalty": ["l2"],
|
| 122 |
+
"clf__C": [0.1, 1.0, 10.0],
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"clf__solver": ["liblinear"],
|
| 126 |
+
"clf__penalty": ["l1", "l2"],
|
| 127 |
+
"clf__C": [0.1, 1.0, 10.0],
|
| 128 |
+
},
|
| 129 |
+
],
|
| 130 |
+
),
|
| 131 |
+
"RF_balanced": (
|
| 132 |
+
RandomForestClassifier(
|
| 133 |
+
n_estimators=300,
|
| 134 |
+
max_depth=8,
|
| 135 |
+
min_samples_split=10,
|
| 136 |
+
min_samples_leaf=5,
|
| 137 |
+
class_weight="balanced_subsample",
|
| 138 |
+
random_state=random_state,
|
| 139 |
+
),
|
| 140 |
+
{
|
| 141 |
+
"clf__n_estimators": [200, 300, 500],
|
| 142 |
+
"clf__max_depth": [6, 8, 10],
|
| 143 |
+
"clf__min_samples_split": [5, 10, 15],
|
| 144 |
+
"clf__min_samples_leaf": [2, 5, 8],
|
| 145 |
+
},
|
| 146 |
+
),
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
|
| 150 |
+
def _compute_best_threshold(y_true, y_proba):
|
| 151 |
+
precision, recall, thresholds = precision_recall_curve(y_true, y_proba)
|
| 152 |
+
f1_scores = 2 * (precision * recall) / (precision + recall + 1e-8)
|
| 153 |
+
best_idx = np.nanargmax(f1_scores)
|
| 154 |
+
if thresholds.size == 0:
|
| 155 |
+
return 0.5
|
| 156 |
+
best_idx = min(best_idx, thresholds.size - 1)
|
| 157 |
+
return thresholds[best_idx]
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def evaluate_models(X, y, settings: Settings, preprocessor: ColumnTransformer) -> list[ModelResult]:
|
| 161 |
+
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=settings.random_state)
|
| 162 |
+
results: list[ModelResult] = []
|
| 163 |
+
|
| 164 |
+
for name, (model, grid) in get_models(settings.random_state).items():
|
| 165 |
+
logger.info("Entraînement du modèle {}", name)
|
| 166 |
+
pipe = ImbPipeline(
|
| 167 |
+
steps=[
|
| 168 |
+
("prep", preprocessor),
|
| 169 |
+
("smote", SMOTE(random_state=settings.random_state)),
|
| 170 |
+
("clf", model),
|
| 171 |
+
]
|
| 172 |
+
)
|
| 173 |
+
search = GridSearchCV(
|
| 174 |
+
estimator=pipe,
|
| 175 |
+
param_grid=grid,
|
| 176 |
+
cv=cv,
|
| 177 |
+
scoring="f1",
|
| 178 |
+
n_jobs=-1,
|
| 179 |
+
)
|
| 180 |
+
search.fit(X, y)
|
| 181 |
+
best_pipe = search.best_estimator_
|
| 182 |
+
|
| 183 |
+
y_proba = cross_val_predict(best_pipe, X, y, cv=cv, method="predict_proba")[:, 1]
|
| 184 |
+
threshold = _compute_best_threshold(y, y_proba)
|
| 185 |
+
y_pred = (y_proba >= threshold).astype(int)
|
| 186 |
+
|
| 187 |
+
metrics = {
|
| 188 |
+
"f1": f1_score(y, y_pred),
|
| 189 |
+
"recall": recall_score(y, y_pred),
|
| 190 |
+
"precision": precision_score(y, y_pred),
|
| 191 |
+
"roc_auc": roc_auc_score(y, y_proba),
|
| 192 |
+
}
|
| 193 |
+
logger.info("Scores {} -> {}", name, metrics)
|
| 194 |
+
results.append(
|
| 195 |
+
ModelResult(
|
| 196 |
+
name=name,
|
| 197 |
+
best_estimator=best_pipe,
|
| 198 |
+
best_params=search.best_params_,
|
| 199 |
+
best_threshold=threshold,
|
| 200 |
+
metrics=metrics,
|
| 201 |
+
)
|
| 202 |
+
)
|
| 203 |
+
return results
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def compute_dummy_baseline(y: pd.Series) -> dict:
|
| 207 |
+
majority = int(y.mode().iloc[0])
|
| 208 |
+
y_pred = np.full_like(y, fill_value=majority)
|
| 209 |
+
return {
|
| 210 |
+
"strategy": "most_frequent",
|
| 211 |
+
"majority_class": majority,
|
| 212 |
+
"f1": f1_score(y, y_pred),
|
| 213 |
+
"recall": recall_score(y, y_pred),
|
| 214 |
+
"precision": precision_score(y, y_pred, zero_division=0),
|
| 215 |
+
"roc_auc": 0.5,
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def fit_final_pipeline(
|
| 220 |
+
best_result: ModelResult,
|
| 221 |
+
X: pd.DataFrame,
|
| 222 |
+
y: pd.Series,
|
| 223 |
+
settings: Settings,
|
| 224 |
+
):
|
| 225 |
+
sm = SMOTE(random_state=settings.random_state)
|
| 226 |
+
X_bal, y_bal = sm.fit_resample(X, y)
|
| 227 |
+
final_preprocessor = build_preprocessor(settings, X)
|
| 228 |
+
clf = clone(best_result.best_estimator.named_steps["clf"])
|
| 229 |
+
final_pipe = Pipeline(
|
| 230 |
+
steps=[
|
| 231 |
+
("prep", final_preprocessor),
|
| 232 |
+
("clf", clf),
|
| 233 |
+
]
|
| 234 |
+
)
|
| 235 |
+
final_pipe.fit(X_bal, y_bal)
|
| 236 |
+
logger.success(
|
| 237 |
+
"Modèle {} ré-entraîné sur {} lignes équilibrées.", best_result.name, len(X_bal)
|
| 238 |
+
)
|
| 239 |
+
return final_pipe
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
def save_artifacts(
|
| 243 |
+
pipeline: Pipeline,
|
| 244 |
+
results: list[ModelResult],
|
| 245 |
+
best_result: ModelResult,
|
| 246 |
+
baseline: dict,
|
| 247 |
+
settings: Settings,
|
| 248 |
+
model_path: Path,
|
| 249 |
+
metadata_path: Path,
|
| 250 |
+
shap_path: Path,
|
| 251 |
+
X: pd.DataFrame,
|
| 252 |
+
y: pd.Series,
|
| 253 |
+
):
|
| 254 |
+
model_path.parent.mkdir(parents=True, exist_ok=True)
|
| 255 |
+
dump(pipeline, model_path)
|
| 256 |
+
logger.success("Pipeline sauvegardé dans {}", model_path)
|
| 257 |
+
|
| 258 |
+
metadata = {
|
| 259 |
+
"best_model": best_result.name,
|
| 260 |
+
"best_threshold": float(best_result.best_threshold),
|
| 261 |
+
"best_params": best_result.best_params,
|
| 262 |
+
"metrics": _clean_values(best_result.metrics),
|
| 263 |
+
"all_results": [
|
| 264 |
+
{
|
| 265 |
+
"model": r.name,
|
| 266 |
+
"metrics": _clean_values(r.metrics),
|
| 267 |
+
"best_threshold": float(r.best_threshold),
|
| 268 |
+
"best_params": r.best_params,
|
| 269 |
+
}
|
| 270 |
+
for r in results
|
| 271 |
+
],
|
| 272 |
+
"baseline": _clean_values(baseline),
|
| 273 |
+
"features": {
|
| 274 |
+
"numerical": list(settings.num_cols),
|
| 275 |
+
"categorical": list(settings.cat_cols),
|
| 276 |
+
},
|
| 277 |
+
"target": settings.target,
|
| 278 |
+
}
|
| 279 |
+
metadata_path.parent.mkdir(parents=True, exist_ok=True)
|
| 280 |
+
metadata_path.write_text(json.dumps(metadata, indent=2), encoding="utf-8")
|
| 281 |
+
logger.info("Métadonnées sauvegardées dans {}", metadata_path)
|
| 282 |
+
|
| 283 |
+
shap_summary, shap_values = compute_shap_summary(pipeline, X, y)
|
| 284 |
+
if shap_summary is not None:
|
| 285 |
+
save_shap_summary(shap_summary, shap_path)
|
| 286 |
+
export_local_explanations(pipeline, shap_values, X)
|
| 287 |
|
| 288 |
|
| 289 |
@app.command()
|
| 290 |
def main(
|
| 291 |
+
settings_path: Path = typer.Option(None, "--settings", "-s", help="Chemin alternatif vers settings.yml."),
|
| 292 |
+
input_path: Path = typer.Option(
|
| 293 |
+
PROCESSED_DATA_DIR / "dataset.csv",
|
| 294 |
+
"--input",
|
| 295 |
+
"-i",
|
| 296 |
+
help="Dataset enrichi issu de projet_05/features.py",
|
| 297 |
+
),
|
| 298 |
+
model_path: Path = typer.Option(
|
| 299 |
+
MODELS_DIR / "best_model.joblib",
|
| 300 |
+
"--model-path",
|
| 301 |
+
help="Chemin de sauvegarde du pipeline entraîné.",
|
| 302 |
+
),
|
| 303 |
+
metadata_path: Path = typer.Option(
|
| 304 |
+
MODELS_DIR / "best_model_meta.json",
|
| 305 |
+
"--metadata-path",
|
| 306 |
+
help="Chemin de sauvegarde des métriques et métadonnées.",
|
| 307 |
+
),
|
| 308 |
+
shap_path: Path = typer.Option(
|
| 309 |
+
REPORTS_DIR / "shap_summary.csv",
|
| 310 |
+
"--shap-path",
|
| 311 |
+
help="Chemin de sortie du résumé SHAP.",
|
| 312 |
+
),
|
| 313 |
):
|
| 314 |
+
"""Script principal pour lancer l'entraînement complet."""
|
| 315 |
+
|
| 316 |
+
settings = load_settings(settings_path) if settings_path else load_settings()
|
| 317 |
+
df = load_processed_dataset(input_path)
|
| 318 |
+
X, y = split_features_target(df, settings)
|
| 319 |
+
preprocessor = build_preprocessor(settings, X)
|
| 320 |
+
results = evaluate_models(X, y, settings, preprocessor)
|
| 321 |
+
if not results:
|
| 322 |
+
raise RuntimeError("Aucun modèle évalué. Vérifiez la configuration.")
|
| 323 |
+
best_result = max(results, key=lambda r: r.metrics["f1"])
|
| 324 |
+
baseline = compute_dummy_baseline(y)
|
| 325 |
+
logger.info("Baseline Dummy -> {}", baseline)
|
| 326 |
+
|
| 327 |
+
final_pipeline = fit_final_pipeline(best_result, X, y, settings)
|
| 328 |
+
save_artifacts(
|
| 329 |
+
final_pipeline,
|
| 330 |
+
results,
|
| 331 |
+
best_result,
|
| 332 |
+
baseline,
|
| 333 |
+
settings,
|
| 334 |
+
model_path,
|
| 335 |
+
metadata_path,
|
| 336 |
+
shap_path,
|
| 337 |
+
X,
|
| 338 |
+
y,
|
| 339 |
+
)
|
| 340 |
|
| 341 |
|
| 342 |
if __name__ == "__main__":
|
hf_space/hf_space/hf_space/projet_05/settings.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass, field
|
| 4 |
+
from functools import lru_cache
|
| 5 |
+
import os
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Iterable
|
| 8 |
+
|
| 9 |
+
import yaml
|
| 10 |
+
|
| 11 |
+
DEFAULT_SETTINGS_PATH = Path(__file__).with_name("settings.yml")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@dataclass(frozen=True)
|
| 15 |
+
class Settings:
|
| 16 |
+
random_state: int = 42
|
| 17 |
+
path_sirh: Path = field(default_factory=lambda: Path("data/raw/sirh.csv"))
|
| 18 |
+
path_eval: Path = field(default_factory=lambda: Path("data/raw/evaluation.csv"))
|
| 19 |
+
path_sondage: Path = field(default_factory=lambda: Path("data/raw/sondage.csv"))
|
| 20 |
+
col_id: str = "id_employee"
|
| 21 |
+
target: str = "a_quitte_l_entreprise"
|
| 22 |
+
num_cols: tuple[str, ...] = ()
|
| 23 |
+
cat_cols: tuple[str, ...] = ()
|
| 24 |
+
sat_cols: tuple[str, ...] = ()
|
| 25 |
+
first_vars: tuple[str, ...] = ()
|
| 26 |
+
subsample_frac: float = 1.0
|
| 27 |
+
sql_file: Path = field(default_factory=lambda: Path("merge_sql.sql"))
|
| 28 |
+
db_file: Path = field(default_factory=lambda: Path("merge_temp.db"))
|
| 29 |
+
|
| 30 |
+
def as_dict(self) -> dict:
|
| 31 |
+
"""Return a serializable representation (useful for logging/tests)."""
|
| 32 |
+
return {
|
| 33 |
+
"random_state": self.random_state,
|
| 34 |
+
"path_sirh": str(self.path_sirh),
|
| 35 |
+
"path_eval": str(self.path_eval),
|
| 36 |
+
"path_sondage": str(self.path_sondage),
|
| 37 |
+
"col_id": self.col_id,
|
| 38 |
+
"target": self.target,
|
| 39 |
+
"num_cols": list(self.num_cols),
|
| 40 |
+
"cat_cols": list(self.cat_cols),
|
| 41 |
+
"sat_cols": list(self.sat_cols),
|
| 42 |
+
"first_vars": list(self.first_vars),
|
| 43 |
+
"subsample_frac": self.subsample_frac,
|
| 44 |
+
"sql_file": str(self.sql_file),
|
| 45 |
+
"db_file": str(self.db_file),
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _ensure_iterable(values: Iterable[str] | None, *, field_name: str) -> tuple[str, ...]:
|
| 50 |
+
if values is None:
|
| 51 |
+
return ()
|
| 52 |
+
if isinstance(values, str):
|
| 53 |
+
msg = f"'{field_name}' doit être une liste et non une chaîne isolée."
|
| 54 |
+
raise TypeError(msg)
|
| 55 |
+
return tuple(v for v in values if v)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _resolve_path(candidate: str | os.PathLike[str] | None, *, base_dir: Path) -> Path:
|
| 59 |
+
if not candidate:
|
| 60 |
+
raise ValueError("Aucun chemin n'a été fourni dans le fichier de configuration.")
|
| 61 |
+
resolved = Path(candidate)
|
| 62 |
+
if not resolved.is_absolute():
|
| 63 |
+
resolved = (base_dir / resolved).resolve()
|
| 64 |
+
return resolved
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def _load_raw_settings(path: Path) -> dict:
|
| 68 |
+
with path.open("r", encoding="utf-8") as handle:
|
| 69 |
+
data = yaml.safe_load(handle) or {}
|
| 70 |
+
if not isinstance(data, dict):
|
| 71 |
+
raise ValueError(f"Le fichier de configuration {path} doit contenir un dictionnaire YAML.")
|
| 72 |
+
return data
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
@lru_cache
|
| 76 |
+
def load_settings(custom_path: str | os.PathLike[str] | None = None) -> Settings:
|
| 77 |
+
"""
|
| 78 |
+
Charger la configuration projet depuis un fichier YAML.
|
| 79 |
+
|
| 80 |
+
L'ordre de recherche est :
|
| 81 |
+
1. Argument `custom_path` si fourni.
|
| 82 |
+
2. Variable d'environnement `PROJET05_SETTINGS`.
|
| 83 |
+
3. Fichier par défaut `projet_05/settings.yml`.
|
| 84 |
+
"""
|
| 85 |
+
|
| 86 |
+
env_path = os.environ.get("PROJET05_SETTINGS")
|
| 87 |
+
raw_path = Path(custom_path or env_path or DEFAULT_SETTINGS_PATH)
|
| 88 |
+
|
| 89 |
+
if not raw_path.exists():
|
| 90 |
+
raise FileNotFoundError(
|
| 91 |
+
f"Fichier de configuration introuvable : {raw_path}. "
|
| 92 |
+
"Initialisez-le depuis projet_05/settings.yml ou indiquez PROJET05_SETTINGS."
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
base_dir = raw_path.parent
|
| 96 |
+
payload = _load_raw_settings(raw_path)
|
| 97 |
+
paths_block = payload.get("paths", {})
|
| 98 |
+
|
| 99 |
+
settings = Settings(
|
| 100 |
+
random_state=int(payload.get("random_state", Settings.random_state)),
|
| 101 |
+
path_sirh=_resolve_path(paths_block.get("sirh", Settings().path_sirh), base_dir=base_dir),
|
| 102 |
+
path_eval=_resolve_path(paths_block.get("evaluation", Settings().path_eval), base_dir=base_dir),
|
| 103 |
+
path_sondage=_resolve_path(paths_block.get("sondage", Settings().path_sondage), base_dir=base_dir),
|
| 104 |
+
col_id=payload.get("col_id", Settings.col_id),
|
| 105 |
+
target=payload.get("target", Settings.target),
|
| 106 |
+
num_cols=_ensure_iterable(payload.get("num_cols"), field_name="num_cols"),
|
| 107 |
+
cat_cols=_ensure_iterable(payload.get("cat_cols"), field_name="cat_cols"),
|
| 108 |
+
sat_cols=_ensure_iterable(payload.get("sat_cols"), field_name="sat_cols"),
|
| 109 |
+
first_vars=_ensure_iterable(payload.get("first_vars"), field_name="first_vars"),
|
| 110 |
+
subsample_frac=float(payload.get("subsample_frac", Settings.subsample_frac)),
|
| 111 |
+
sql_file=_resolve_path(paths_block.get("sql_file", Settings().sql_file), base_dir=base_dir),
|
| 112 |
+
db_file=_resolve_path(paths_block.get("db_file", Settings().db_file), base_dir=base_dir),
|
| 113 |
+
)
|
| 114 |
+
return settings
|
hf_space/hf_space/hf_space/projet_05/settings.yml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
random_state: 42
|
| 2 |
+
col_id: id_employee
|
| 3 |
+
target: a_quitte_l_entreprise
|
| 4 |
+
subsample_frac: 0.5
|
| 5 |
+
|
| 6 |
+
paths:
|
| 7 |
+
sirh: ../data/raw/sirh.csv
|
| 8 |
+
evaluation: ../data/raw/evaluation.csv
|
| 9 |
+
sondage: ../data/raw/sondage.csv
|
| 10 |
+
sql_file: ../reports/merge_sql.sql
|
| 11 |
+
db_file: ../data/interim/merge_temp.db
|
| 12 |
+
|
| 13 |
+
num_cols:
|
| 14 |
+
- age
|
| 15 |
+
- revenu_mensuel
|
| 16 |
+
- annees_dans_l_entreprise
|
| 17 |
+
- annees_dans_le_poste_actuel
|
| 18 |
+
- annees_depuis_la_derniere_promotion
|
| 19 |
+
- distance_domicile_travail
|
| 20 |
+
- nombre_participation_pee
|
| 21 |
+
- note_evaluation_actuelle
|
| 22 |
+
- note_evaluation_precedente
|
| 23 |
+
- annees_depuis_le_changement_deposte
|
| 24 |
+
- annee_experience_totale
|
| 25 |
+
- nb_formations_suivies
|
| 26 |
+
- satisfaction_employee_environnement
|
| 27 |
+
- satisfaction_employee_nature_travail
|
| 28 |
+
- satisfaction_employee_equipe
|
| 29 |
+
- satisfaction_employee_equilibre_pro_perso
|
| 30 |
+
- augmentation_par_revenu
|
| 31 |
+
- annee_sur_poste_par_experience
|
| 32 |
+
- nb_formation_par_experience
|
| 33 |
+
- score_moyen_satisfaction
|
| 34 |
+
- dern_promo_par_experience
|
| 35 |
+
- evolution_note
|
| 36 |
+
|
| 37 |
+
cat_cols:
|
| 38 |
+
- genre
|
| 39 |
+
- departement
|
| 40 |
+
- frequence_deplacement
|
| 41 |
+
- etat_civil
|
| 42 |
+
- niveau_etudes
|
| 43 |
+
- role
|
| 44 |
+
- type_contrat
|
| 45 |
+
|
| 46 |
+
sat_cols:
|
| 47 |
+
- satisfaction_employee_environnement
|
| 48 |
+
- satisfaction_employee_nature_travail
|
| 49 |
+
- satisfaction_employee_equipe
|
| 50 |
+
- satisfaction_employee_equilibre_pro_perso
|
| 51 |
+
|
| 52 |
+
first_vars:
|
| 53 |
+
- age
|
| 54 |
+
- revenu_mensuel
|
| 55 |
+
- annees_dans_l_entreprise
|
| 56 |
+
- note_evaluation_actuelle
|
hf_space/hf_space/hf_space/scripts_projet04/brand/__init__.py
ADDED
|
File without changes
|
hf_space/hf_space/hf_space/scripts_projet04/brand/brand.py
ADDED
|
@@ -0,0 +1,713 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Palette et thèmes graphiques pour Matplotlib/Seaborn.
|
| 2 |
+
|
| 3 |
+
Ce module fournit une classe utilitaire (`Theme`) et une configuration
|
| 4 |
+
externe (`ThemeConfig`, `configure_theme`) permettant de définir des
|
| 5 |
+
couleurs, des palettes qualitatives et des cartes de couleurs (colormaps)
|
| 6 |
+
cohérentes. Des fonctions de démonstration et des wrappers
|
| 7 |
+
rétrocompatibles sont également fournis.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import Any, List, Literal, Mapping, Optional, Tuple, Union
|
| 12 |
+
from dataclasses import dataclass, field, fields
|
| 13 |
+
|
| 14 |
+
import seaborn as sns
|
| 15 |
+
import numpy as np # Données factices pour les démos
|
| 16 |
+
import matplotlib.pyplot as plt
|
| 17 |
+
import matplotlib.colors as mcolors
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
#
|
| 21 |
+
# Dataclass de configuration et gestion externe du thème
|
| 22 |
+
#
|
| 23 |
+
@dataclass
|
| 24 |
+
class ThemeConfig:
|
| 25 |
+
"""Configuration externe du thème.
|
| 26 |
+
|
| 27 |
+
Cette structure regroupe les couleurs principales, les variantes de
|
| 28 |
+
palette et des options d'apparence (fond, rcParams). Elle peut ĂŞtre
|
| 29 |
+
passée à :func:`configure_theme`.
|
| 30 |
+
|
| 31 |
+
Attributs
|
| 32 |
+
---------
|
| 33 |
+
primary, secondary, tertiary : str
|
| 34 |
+
Couleurs principales au format hexadécimal (p. ex. "#RRGGBB").
|
| 35 |
+
background : str
|
| 36 |
+
Couleur d'arrière‑plan des figures et axes.
|
| 37 |
+
primary_variants, secondary_variants, tertiary_variants : list[str]
|
| 38 |
+
Variantes qualitatives pour les séries multiples.
|
| 39 |
+
sequential_light, sequential_dark : dict | None
|
| 40 |
+
Remplacements explicites des teintes claires/foncées pour les
|
| 41 |
+
colormaps séquentiels.
|
| 42 |
+
light_amount, dark_amount : float
|
| 43 |
+
Coefficients utilisés pour éclaircir/assombrir si aucun
|
| 44 |
+
remplacement n'est fourni.
|
| 45 |
+
text_color, axes_labelcolor, tick_color : str
|
| 46 |
+
Couleurs des textes, étiquettes et graduations.
|
| 47 |
+
figure_dpi, savefig_dpi : int
|
| 48 |
+
Résolution d'affichage et d'export.
|
| 49 |
+
"""
|
| 50 |
+
# Couleurs principales
|
| 51 |
+
primary: str = "#7451EB"
|
| 52 |
+
secondary: str = "#EE8273"
|
| 53 |
+
tertiary: str = "#A6BD63"
|
| 54 |
+
# Couleur d'arrière-plan
|
| 55 |
+
background: str = "#FFFCF2"
|
| 56 |
+
# Variantes qualitatives
|
| 57 |
+
primary_variants: List[str] = field(default_factory=lambda: ["#9D7EF0", "#4B25D6"])
|
| 58 |
+
secondary_variants: List[str] = field(default_factory=lambda: ["#F3A093", "#D95848"])
|
| 59 |
+
tertiary_variants: List[str] = field(default_factory=lambda: ["#BDD681", "#7E923F"])
|
| 60 |
+
# Remplacements stops séquentiels
|
| 61 |
+
sequential_light: Optional[dict] = field(default_factory=lambda: {
|
| 62 |
+
"primary": "#f3f0fd",
|
| 63 |
+
"secondary": "#fdecea",
|
| 64 |
+
"tertiary": "#f6faec",
|
| 65 |
+
})
|
| 66 |
+
sequential_dark: Optional[dict] = field(default_factory=lambda: {
|
| 67 |
+
"primary": "#2f1577",
|
| 68 |
+
"secondary": "#8b3025",
|
| 69 |
+
"tertiary": "#4b5c27",
|
| 70 |
+
})
|
| 71 |
+
# Coefficients de mélange par défaut
|
| 72 |
+
light_amount: float = 0.85
|
| 73 |
+
dark_amount: float = 0.65
|
| 74 |
+
# RC params (matplotlib)
|
| 75 |
+
text_color: str = "black"
|
| 76 |
+
axes_labelcolor: str = "black"
|
| 77 |
+
tick_color: str = "black"
|
| 78 |
+
figure_dpi: int = 110
|
| 79 |
+
savefig_dpi: int = 300
|
| 80 |
+
|
| 81 |
+
# Paramètres matplotlib appliqués par `Theme.apply()`.
|
| 82 |
+
THEME_RC_OVERRIDES = {
|
| 83 |
+
"text.color": "black",
|
| 84 |
+
"axes.labelcolor": "black",
|
| 85 |
+
"xtick.color": "black",
|
| 86 |
+
"ytick.color": "black",
|
| 87 |
+
"figure.dpi": 110,
|
| 88 |
+
"savefig.dpi": 300,
|
| 89 |
+
"savefig.bbox": "tight",
|
| 90 |
+
"svg.fonttype": "none",
|
| 91 |
+
"figure.facecolor": "#FFFCF2",
|
| 92 |
+
"axes.facecolor": "#FFFCF2",
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def configure_theme(cfg: ThemeConfig) -> None:
|
| 97 |
+
"""Applique une configuration **externe** au thème.
|
| 98 |
+
|
| 99 |
+
Cette fonction met Ă jour les couleurs et palettes de :class:`Theme`
|
| 100 |
+
et prépare les paramètres matplotlib (``rcParams``) pour le fond,
|
| 101 |
+
les couleurs de texte et les résolutions.
|
| 102 |
+
|
| 103 |
+
Paramètres
|
| 104 |
+
----------
|
| 105 |
+
cfg : ThemeConfig
|
| 106 |
+
Instance contenant l'ensemble des options de thème.
|
| 107 |
+
"""
|
| 108 |
+
# Configure la palette dans la classe
|
| 109 |
+
Theme.configure(
|
| 110 |
+
primary=cfg.primary,
|
| 111 |
+
secondary=cfg.secondary,
|
| 112 |
+
tertiary=cfg.tertiary,
|
| 113 |
+
primary_variants=cfg.primary_variants,
|
| 114 |
+
secondary_variants=cfg.secondary_variants,
|
| 115 |
+
tertiary_variants=cfg.tertiary_variants,
|
| 116 |
+
sequential_light=cfg.sequential_light,
|
| 117 |
+
sequential_dark=cfg.sequential_dark,
|
| 118 |
+
light_amount=cfg.light_amount,
|
| 119 |
+
dark_amount=cfg.dark_amount,
|
| 120 |
+
)
|
| 121 |
+
Theme.BACKGROUND = cfg.background
|
| 122 |
+
# Prépare les overrides rcParams
|
| 123 |
+
THEME_RC_OVERRIDES.update({
|
| 124 |
+
"text.color": cfg.text_color,
|
| 125 |
+
"axes.labelcolor": cfg.axes_labelcolor,
|
| 126 |
+
"xtick.color": cfg.tick_color,
|
| 127 |
+
"ytick.color": cfg.tick_color,
|
| 128 |
+
"figure.dpi": cfg.figure_dpi,
|
| 129 |
+
"savefig.dpi": cfg.savefig_dpi,
|
| 130 |
+
# On conserve ces valeurs par défaut qui ne dépendent pas de la couleur
|
| 131 |
+
"savefig.bbox": "tight",
|
| 132 |
+
"svg.fonttype": "none",
|
| 133 |
+
"figure.facecolor": cfg.background,
|
| 134 |
+
"axes.facecolor": cfg.background,
|
| 135 |
+
})
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def _config_from_mapping(data: Mapping[str, Any]) -> ThemeConfig:
|
| 139 |
+
"""Convertit un mapping arbitraire en :class:`ThemeConfig`."""
|
| 140 |
+
allowed_fields = {f.name for f in fields(ThemeConfig)}
|
| 141 |
+
unknown_keys = set(data) - allowed_fields
|
| 142 |
+
if unknown_keys:
|
| 143 |
+
raise ValueError(
|
| 144 |
+
"Clés inconnues dans la configuration du thème: "
|
| 145 |
+
+ ", ".join(sorted(unknown_keys))
|
| 146 |
+
)
|
| 147 |
+
filtered = {k: data[k] for k in allowed_fields if k in data}
|
| 148 |
+
return ThemeConfig(**filtered)
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def load_brand(path: Union[str, Path]) -> ThemeConfig:
|
| 152 |
+
"""Charge un fichier YAML et retourne une configuration de thème.
|
| 153 |
+
|
| 154 |
+
Le fichier doit contenir des clés correspondant aux attributs de
|
| 155 |
+
:class:`ThemeConfig`. Les valeurs absentes conservent les valeurs
|
| 156 |
+
par défaut de la dataclass.
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
try:
|
| 160 |
+
import yaml
|
| 161 |
+
except ImportError as exc: # pragma: no cover - dépendance optionnelle
|
| 162 |
+
raise RuntimeError(
|
| 163 |
+
"PyYAML est requis pour charger une charte graphique YAML. "
|
| 164 |
+
"Installez le paquet 'pyyaml'."
|
| 165 |
+
) from exc
|
| 166 |
+
|
| 167 |
+
file_path = Path(path).expanduser()
|
| 168 |
+
if not file_path.exists():
|
| 169 |
+
raise FileNotFoundError(f"Fichier YAML introuvable: {file_path}")
|
| 170 |
+
|
| 171 |
+
with file_path.open("r", encoding="utf-8") as handle:
|
| 172 |
+
content = yaml.safe_load(handle) or {}
|
| 173 |
+
|
| 174 |
+
if not isinstance(content, Mapping):
|
| 175 |
+
raise ValueError(
|
| 176 |
+
"Le contenu du YAML doit être un mapping clé/valeur (dict)."
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
return _config_from_mapping(content)
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def configure_brand(path: Union[str, Path]) -> ThemeConfig:
|
| 183 |
+
"""Charge un fichier YAML puis applique la configuration obtenue."""
|
| 184 |
+
|
| 185 |
+
cfg = load_brand(path)
|
| 186 |
+
configure_theme(cfg)
|
| 187 |
+
return cfg
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
class Theme:
|
| 191 |
+
"""Thème graphique pour Matplotlib et Seaborn.
|
| 192 |
+
|
| 193 |
+
La classe fournit :
|
| 194 |
+
- des couleurs principales et une palette qualitative étendue ;
|
| 195 |
+
- des cartes de couleurs (séquentielles et divergentes) ;
|
| 196 |
+
- une méthode :meth:`apply` pour appliquer le thème globalement ;
|
| 197 |
+
- des méthodes de démonstration pour un aperçu rapide.
|
| 198 |
+
|
| 199 |
+
Les couleurs ne sont pas figées : utilisez
|
| 200 |
+
:func:`configure_theme` pour injecter une configuration externe.
|
| 201 |
+
"""
|
| 202 |
+
|
| 203 |
+
# --- Couleurs principales (configurables via Theme.configure) ---
|
| 204 |
+
# Valeurs par défaut (seront écrasées par configure())
|
| 205 |
+
PRIMARY: str = "#7451EB" # violet (chaud)
|
| 206 |
+
SECONDARY: str = "#EE8273" # corail (chaud)
|
| 207 |
+
TERTIARY: str = "#A6BD63" # vert (froid)
|
| 208 |
+
BACKGROUND: str = "#FFFCF2"
|
| 209 |
+
|
| 210 |
+
PALETTE: List[str] = ["#7451EB", "#EE8273", "#A6BD63"]
|
| 211 |
+
|
| 212 |
+
@classmethod
|
| 213 |
+
def base_palette(cls) -> List[str]:
|
| 214 |
+
"""Retourne la palette fondamentale (PRIMARY, SECONDARY, TERTIARY)."""
|
| 215 |
+
return [cls.PRIMARY, cls.SECONDARY, cls.TERTIARY]
|
| 216 |
+
|
| 217 |
+
# Variantes (palette qualitative) – configurables
|
| 218 |
+
_PRIMARY_VARIANTS: List[str] = ["#9D7EF0", "#4B25D6"]
|
| 219 |
+
_SECONDARY_VARIANTS: List[str] = ["#F3A093", "#D95848"]
|
| 220 |
+
_TERTIARY_VARIANTS: List[str] = ["#BDD681", "#7E923F"]
|
| 221 |
+
|
| 222 |
+
# Colormaps séquentiels (clair -> couleur -> foncé) – configurables
|
| 223 |
+
_SEQUENTIALS = {
|
| 224 |
+
"primary": ["#f3f0fd", PRIMARY, "#2f1577"],
|
| 225 |
+
"secondary": ["#fdecea", SECONDARY, "#8b3025"],
|
| 226 |
+
"tertiary": ["#f6faec", TERTIARY, "#4b5c27"],
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
_NAMES = {"primary", "secondary", "tertiary"}
|
| 230 |
+
|
| 231 |
+
# --------- Configuration dynamique ---------
|
| 232 |
+
@staticmethod
|
| 233 |
+
def _to_rgb(color: str):
|
| 234 |
+
return np.array(mcolors.to_rgb(color))
|
| 235 |
+
|
| 236 |
+
@classmethod
|
| 237 |
+
def _tint(cls, color: str, amount: float = 0.85) -> str:
|
| 238 |
+
"""Retourne une version éclaircie de ``color``.
|
| 239 |
+
|
| 240 |
+
Le mélange avec le blanc est contrôlé par ``amount`` (0..1).
|
| 241 |
+
"""
|
| 242 |
+
c = cls._to_rgb(color)
|
| 243 |
+
white = np.array([1.0, 1.0, 1.0])
|
| 244 |
+
mixed = (1 - amount) * c + amount * white
|
| 245 |
+
return mcolors.to_hex(mixed) # type: ignore
|
| 246 |
+
|
| 247 |
+
@classmethod
|
| 248 |
+
def _shade(cls, color: str, amount: float = 0.65) -> str:
|
| 249 |
+
"""Retourne une version assombrie de ``color``.
|
| 250 |
+
|
| 251 |
+
Le mélange avec le noir est contrôlé par ``amount`` (0..1).
|
| 252 |
+
"""
|
| 253 |
+
c = cls._to_rgb(color)
|
| 254 |
+
black = np.array([0.0, 0.0, 0.0])
|
| 255 |
+
mixed = (1 - amount) * c + amount * black
|
| 256 |
+
return mcolors.to_hex(mixed) # type: ignore
|
| 257 |
+
|
| 258 |
+
@classmethod
|
| 259 |
+
def _compute_sequentials(
|
| 260 |
+
cls,
|
| 261 |
+
primary: str,
|
| 262 |
+
secondary: str,
|
| 263 |
+
tertiary: str,
|
| 264 |
+
light_overrides: Optional[dict] = None,
|
| 265 |
+
dark_overrides: Optional[dict] = None,
|
| 266 |
+
light_amount: float = 0.85,
|
| 267 |
+
dark_amount: float = 0.65,
|
| 268 |
+
) -> dict:
|
| 269 |
+
"""Construit les stops [clair, milieu, foncé] de chaque couleur.
|
| 270 |
+
|
| 271 |
+
Paramètres
|
| 272 |
+
----------
|
| 273 |
+
primary, secondary, tertiary : str
|
| 274 |
+
Couleurs principales en hex.
|
| 275 |
+
light_overrides, dark_overrides : dict | None
|
| 276 |
+
Remplacements explicites pour les teintes claires/foncées.
|
| 277 |
+
light_amount, dark_amount : float
|
| 278 |
+
Coefficients de mélange quand aucun remplacement n'est fourni.
|
| 279 |
+
|
| 280 |
+
Retour
|
| 281 |
+
------
|
| 282 |
+
dict
|
| 283 |
+
Dictionnaire {nom: [clair, milieu, foncé]}.
|
| 284 |
+
"""
|
| 285 |
+
light_overrides = light_overrides or {}
|
| 286 |
+
dark_overrides = dark_overrides or {}
|
| 287 |
+
base = {
|
| 288 |
+
"primary": primary,
|
| 289 |
+
"secondary": secondary,
|
| 290 |
+
"tertiary": tertiary,
|
| 291 |
+
}
|
| 292 |
+
seq = {}
|
| 293 |
+
for k, mid in base.items():
|
| 294 |
+
light = light_overrides.get(k) or cls._tint(mid, amount=light_amount)
|
| 295 |
+
dark = dark_overrides.get(k) or cls._shade(mid, amount=dark_amount)
|
| 296 |
+
seq[k] = [light, mid, dark]
|
| 297 |
+
return seq
|
| 298 |
+
|
| 299 |
+
@classmethod
|
| 300 |
+
def configure(
|
| 301 |
+
cls,
|
| 302 |
+
*,
|
| 303 |
+
primary: Optional[str] = None,
|
| 304 |
+
secondary: Optional[str] = None,
|
| 305 |
+
tertiary: Optional[str] = None,
|
| 306 |
+
primary_variants: Optional[List[str]] = None,
|
| 307 |
+
secondary_variants: Optional[List[str]] = None,
|
| 308 |
+
tertiary_variants: Optional[List[str]] = None,
|
| 309 |
+
sequential_light: Optional[dict] = None,
|
| 310 |
+
sequential_dark: Optional[dict] = None,
|
| 311 |
+
light_amount: float = 0.85,
|
| 312 |
+
dark_amount: float = 0.65,
|
| 313 |
+
) -> None:
|
| 314 |
+
"""Met Ă jour dynamiquement les couleurs et colormaps de la classe.
|
| 315 |
+
|
| 316 |
+
Exemples
|
| 317 |
+
--------
|
| 318 |
+
>>> Theme.configure(primary="#0072CE", secondary="#FF6A00")
|
| 319 |
+
>>> Theme.configure(
|
| 320 |
+
... primary="#1f77b4",
|
| 321 |
+
... sequential_light={"primary": "#eef5fb"},
|
| 322 |
+
... sequential_dark={"primary": "#0b3050"},
|
| 323 |
+
... )
|
| 324 |
+
|
| 325 |
+
Paramètres
|
| 326 |
+
----------
|
| 327 |
+
primary, secondary, tertiary : str | None
|
| 328 |
+
Couleurs principales.
|
| 329 |
+
primary_variants, secondary_variants, tertiary_variants : list[str] | None
|
| 330 |
+
Variantes qualitatives.
|
| 331 |
+
sequential_light, sequential_dark : dict | None
|
| 332 |
+
Remplacements pour les teintes claires/foncées.
|
| 333 |
+
light_amount, dark_amount : float
|
| 334 |
+
Coefficients de mélange par défaut.
|
| 335 |
+
"""
|
| 336 |
+
if primary:
|
| 337 |
+
cls.PRIMARY = primary
|
| 338 |
+
if secondary:
|
| 339 |
+
cls.SECONDARY = secondary
|
| 340 |
+
if tertiary:
|
| 341 |
+
cls.TERTIARY = tertiary
|
| 342 |
+
|
| 343 |
+
if primary_variants is not None:
|
| 344 |
+
cls._PRIMARY_VARIANTS = primary_variants
|
| 345 |
+
if secondary_variants is not None:
|
| 346 |
+
cls._SECONDARY_VARIANTS = secondary_variants
|
| 347 |
+
if tertiary_variants is not None:
|
| 348 |
+
cls._TERTIARY_VARIANTS = tertiary_variants
|
| 349 |
+
|
| 350 |
+
# Recalcule les rampes séquentielles (avec overrides éventuels)
|
| 351 |
+
cls._SEQUENTIALS = cls._compute_sequentials(
|
| 352 |
+
cls.PRIMARY,
|
| 353 |
+
cls.SECONDARY,
|
| 354 |
+
cls.TERTIARY,
|
| 355 |
+
light_overrides=sequential_light,
|
| 356 |
+
dark_overrides=sequential_dark,
|
| 357 |
+
light_amount=light_amount,
|
| 358 |
+
dark_amount=dark_amount,
|
| 359 |
+
)
|
| 360 |
+
cls.PALETTE = cls.base_palette()
|
| 361 |
+
|
| 362 |
+
# --------- helpers internes ---------
|
| 363 |
+
@classmethod
|
| 364 |
+
def _get_seq(cls, key: str) -> List[str]:
|
| 365 |
+
"""Retourne la rampe séquentielle associée à ``key``.
|
| 366 |
+
|
| 367 |
+
Déclenche ``ValueError`` si la clé est inconnue.
|
| 368 |
+
"""
|
| 369 |
+
key = key.lower()
|
| 370 |
+
if key not in cls._NAMES:
|
| 371 |
+
raise ValueError(f"Couleur inconnue: {key}. Choisir parmi {sorted(cls._NAMES)}.")
|
| 372 |
+
return cls._SEQUENTIALS[key]
|
| 373 |
+
|
| 374 |
+
@staticmethod
|
| 375 |
+
def _from_list(name: str, colors: List[str]) -> mcolors.LinearSegmentedColormap:
|
| 376 |
+
"""Crée un ``LinearSegmentedColormap`` à partir d'une liste.
|
| 377 |
+
"""
|
| 378 |
+
return mcolors.LinearSegmentedColormap.from_list(name, colors)
|
| 379 |
+
|
| 380 |
+
@classmethod
|
| 381 |
+
def _make_diverging(
|
| 382 |
+
cls,
|
| 383 |
+
start_key: str,
|
| 384 |
+
end_key: str,
|
| 385 |
+
*,
|
| 386 |
+
center: Optional[str] = None,
|
| 387 |
+
strong_ends: bool = True,
|
| 388 |
+
blend_center: bool = False,
|
| 389 |
+
blend_ratio: float = 0.5,
|
| 390 |
+
) -> Tuple[str, List[str]]:
|
| 391 |
+
"""Construit un colormap divergent Ă partir de deux rampes.
|
| 392 |
+
|
| 393 |
+
Stops générés :
|
| 394 |
+
``[dark_start?, start_mid, center, end_mid, dark_end?]``
|
| 395 |
+
"""
|
| 396 |
+
s_seq = cls._get_seq(start_key) # [light, mid, dark]
|
| 397 |
+
e_seq = cls._get_seq(end_key) # [light, mid, dark]
|
| 398 |
+
|
| 399 |
+
if blend_center:
|
| 400 |
+
center_color = mix_colors(s_seq[1], e_seq[1], ratio=blend_ratio)
|
| 401 |
+
else:
|
| 402 |
+
center_color = center or "#f7f7f7"
|
| 403 |
+
|
| 404 |
+
colors: List[str] = []
|
| 405 |
+
if strong_ends:
|
| 406 |
+
colors.append(s_seq[2]) # dark start
|
| 407 |
+
colors.append(s_seq[1]) # start mid
|
| 408 |
+
colors.append(center_color) # centre neutre ou mélange
|
| 409 |
+
colors.append(e_seq[1]) # end mid
|
| 410 |
+
if strong_ends:
|
| 411 |
+
colors.append(e_seq[2]) # dark end
|
| 412 |
+
|
| 413 |
+
name = f"ocr_div_{start_key}_{end_key}"
|
| 414 |
+
return name, colors
|
| 415 |
+
|
| 416 |
+
# --------- API publique ---------
|
| 417 |
+
@classmethod
|
| 418 |
+
def colormap(
|
| 419 |
+
cls,
|
| 420 |
+
mode: Literal["primary", "secondary", "tertiary", "sequential", "diverging"] = "primary",
|
| 421 |
+
*,
|
| 422 |
+
start: Optional[Literal["primary", "secondary", "tertiary"]] = None,
|
| 423 |
+
end: Optional[Literal["primary", "secondary", "tertiary"]] = None,
|
| 424 |
+
reverse: bool = False,
|
| 425 |
+
as_cmap: bool = True,
|
| 426 |
+
center: Optional[str] = None,
|
| 427 |
+
blend_center: bool = False,
|
| 428 |
+
blend_ratio: float = 0.5,
|
| 429 |
+
strong_ends: bool = True,
|
| 430 |
+
):
|
| 431 |
+
"""Retourne un colormap Matplotlib ou la liste des stops.
|
| 432 |
+
|
| 433 |
+
Utilisation
|
| 434 |
+
-----------
|
| 435 |
+
Séquentiel autour d'une couleur :
|
| 436 |
+
colormap("primary")
|
| 437 |
+
colormap("sequential", start="primary")
|
| 438 |
+
Divergent entre deux couleurs :
|
| 439 |
+
colormap("diverging", start="primary", end="tertiary")
|
| 440 |
+
|
| 441 |
+
Paramètres
|
| 442 |
+
----------
|
| 443 |
+
mode : {"primary", "secondary", "tertiary", "sequential", "diverging"}
|
| 444 |
+
Type de colormap souhaité.
|
| 445 |
+
start, end : {"primary", "secondary", "tertiary"} | None
|
| 446 |
+
Couleurs de départ/arrivée (suivant le mode).
|
| 447 |
+
reverse : bool
|
| 448 |
+
Inverse l'ordre des couleurs.
|
| 449 |
+
as_cmap : bool
|
| 450 |
+
Si ``True``, retourne un objet ``Colormap`` ; sinon la liste
|
| 451 |
+
des valeurs hexadécimales.
|
| 452 |
+
center : str | None
|
| 453 |
+
Couleur centrale explicite (hexadécimal). Ignorée si ``blend_center`` vaut ``True``.
|
| 454 |
+
blend_center : bool
|
| 455 |
+
Mélange automatiquement les teintes ``start`` et ``end`` pour générer la couleur centrale.
|
| 456 |
+
blend_ratio : float
|
| 457 |
+
Ratio de mélange (0..1) appliqué quand ``blend_center`` est activé.
|
| 458 |
+
strong_ends : bool
|
| 459 |
+
Ajoute les teintes foncées des rampes aux extrémités du colormap divergent.
|
| 460 |
+
"""
|
| 461 |
+
# Alias pour compat : "primary"/"secondary"/"tertiary" => séquentiel
|
| 462 |
+
if mode in {"primary", "secondary", "tertiary"}:
|
| 463 |
+
seq = cls._get_seq(mode)
|
| 464 |
+
colors = list(reversed(seq)) if reverse else seq
|
| 465 |
+
return cls._from_list(f"ocr_{mode}", colors) if as_cmap else colors
|
| 466 |
+
|
| 467 |
+
#mode = mode.lower()
|
| 468 |
+
if mode == "sequential":
|
| 469 |
+
key = (start or "primary").lower()
|
| 470 |
+
seq = cls._get_seq(key)
|
| 471 |
+
colors = list(reversed(seq)) if reverse else seq
|
| 472 |
+
return cls._from_list(f"ocr_seq_{key}", colors) if as_cmap else colors
|
| 473 |
+
|
| 474 |
+
if mode == "diverging":
|
| 475 |
+
if not start or not end:
|
| 476 |
+
raise ValueError("Pour un colormap diverging, fournir start=... et end=...")
|
| 477 |
+
#start = start.lower()
|
| 478 |
+
#end = end.lower()
|
| 479 |
+
if start not in cls._NAMES or end not in cls._NAMES:
|
| 480 |
+
raise ValueError(f"start/end doivent ĂŞtre dans {sorted(cls._NAMES)}.")
|
| 481 |
+
name, colors = cls._make_diverging(
|
| 482 |
+
start,
|
| 483 |
+
end,
|
| 484 |
+
center=center,
|
| 485 |
+
strong_ends=strong_ends,
|
| 486 |
+
blend_center=blend_center,
|
| 487 |
+
blend_ratio=blend_ratio,
|
| 488 |
+
)
|
| 489 |
+
if reverse:
|
| 490 |
+
colors = list(reversed(colors))
|
| 491 |
+
return cls._from_list(name, colors) if as_cmap else colors
|
| 492 |
+
|
| 493 |
+
raise ValueError("mode inconnu. Utiliser 'primary'/'secondary'/'tertiary' ou 'sequential'/'diverging'.")
|
| 494 |
+
|
| 495 |
+
@classmethod
|
| 496 |
+
def apply(cls, *, context: str = "notebook", style: str = "white") -> List[str]:
|
| 497 |
+
"""Applique le thème global Seaborn/Matplotlib.
|
| 498 |
+
|
| 499 |
+
Retourne la palette qualitative étendue utilisée par Seaborn.
|
| 500 |
+
"""
|
| 501 |
+
pal = cls.extended_palette()
|
| 502 |
+
sns.set_theme(context=context, style=style, palette=pal) # type: ignore
|
| 503 |
+
plt.rcParams.update(THEME_RC_OVERRIDES)
|
| 504 |
+
return pal
|
| 505 |
+
|
| 506 |
+
@classmethod
|
| 507 |
+
def extended_palette(cls) -> List[str]:
|
| 508 |
+
"""Retourne la palette qualitative étendue.
|
| 509 |
+
|
| 510 |
+
Utile pour des graphiques multi‑séries (barres, lignes, etc.).
|
| 511 |
+
"""
|
| 512 |
+
return [
|
| 513 |
+
cls.PRIMARY, *cls._PRIMARY_VARIANTS,
|
| 514 |
+
cls.SECONDARY, *cls._SECONDARY_VARIANTS,
|
| 515 |
+
cls.TERTIARY, *cls._TERTIARY_VARIANTS,
|
| 516 |
+
]
|
| 517 |
+
|
| 518 |
+
# --------- Démos appelables ---------
|
| 519 |
+
@staticmethod
|
| 520 |
+
def _demo_field(n: int = 300):
|
| 521 |
+
"""Génère un champ 2D lisse destiné à ``imshow``."""
|
| 522 |
+
x = np.linspace(-3, 3, n)
|
| 523 |
+
y = np.linspace(-3, 3, n)
|
| 524 |
+
X, Y = np.meshgrid(x, y)
|
| 525 |
+
Z = np.sin(X) * np.cos(Y)
|
| 526 |
+
return X, Y, Z
|
| 527 |
+
|
| 528 |
+
@staticmethod
|
| 529 |
+
def _demo_matrix(shape: Tuple[int, int] = (10, 12), seed: int = 0):
|
| 530 |
+
"""Génère une matrice aléatoire pour des heatmaps reproductibles."""
|
| 531 |
+
rng = np.random.default_rng(seed)
|
| 532 |
+
return rng.standard_normal(shape)
|
| 533 |
+
|
| 534 |
+
@classmethod
|
| 535 |
+
def demo_imshow_sequential(
|
| 536 |
+
cls,
|
| 537 |
+
*,
|
| 538 |
+
start: Literal["primary", "secondary", "tertiary"] = "primary",
|
| 539 |
+
reverse: bool = False,
|
| 540 |
+
with_colorbar: bool = True,
|
| 541 |
+
title: Optional[str] = None,
|
| 542 |
+
apply_theme: bool = False,
|
| 543 |
+
) -> None:
|
| 544 |
+
"""Affiche une démo ``imshow`` avec un colormap séquentiel.
|
| 545 |
+
|
| 546 |
+
Exemple
|
| 547 |
+
-------
|
| 548 |
+
>>> Theme.demo_imshow_sequential(start="tertiary", reverse=True)
|
| 549 |
+
"""
|
| 550 |
+
if apply_theme:
|
| 551 |
+
cls.apply()
|
| 552 |
+
_, _, Z = cls._demo_field()
|
| 553 |
+
cmap = cls.colormap("sequential", start=start, reverse=reverse)
|
| 554 |
+
plt.imshow(Z, cmap=cmap, origin="lower") # type: ignore
|
| 555 |
+
direction = "foncé → clair" if reverse else "clair → foncé"
|
| 556 |
+
plt.title(title or f"Séquentiel {start.upper()} ({direction})")
|
| 557 |
+
if with_colorbar:
|
| 558 |
+
plt.colorbar()
|
| 559 |
+
plt.show()
|
| 560 |
+
|
| 561 |
+
@classmethod
|
| 562 |
+
def demo_imshow_diverging(
|
| 563 |
+
cls,
|
| 564 |
+
*,
|
| 565 |
+
start: Literal["primary", "secondary", "tertiary"] = "primary",
|
| 566 |
+
end: Literal["primary", "secondary", "tertiary"] = "secondary",
|
| 567 |
+
reverse: bool = False,
|
| 568 |
+
with_colorbar: bool = True,
|
| 569 |
+
title: Optional[str] = None,
|
| 570 |
+
apply_theme: bool = False,
|
| 571 |
+
center: Optional[str] = None,
|
| 572 |
+
blend_center: bool = False,
|
| 573 |
+
blend_ratio: float = 0.5,
|
| 574 |
+
strong_ends: bool = True,
|
| 575 |
+
) -> None:
|
| 576 |
+
"""Affiche une démo ``imshow`` avec un colormap divergent.
|
| 577 |
+
|
| 578 |
+
Exemple
|
| 579 |
+
-------
|
| 580 |
+
>>> Theme.demo_imshow_diverging(start="primary", end="secondary")
|
| 581 |
+
"""
|
| 582 |
+
if apply_theme:
|
| 583 |
+
cls.apply()
|
| 584 |
+
_, _, Z = cls._demo_field()
|
| 585 |
+
cmap = cls.colormap(
|
| 586 |
+
"diverging",
|
| 587 |
+
start=start,
|
| 588 |
+
end=end,
|
| 589 |
+
reverse=reverse,
|
| 590 |
+
center=center,
|
| 591 |
+
blend_center=blend_center,
|
| 592 |
+
blend_ratio=blend_ratio,
|
| 593 |
+
strong_ends=strong_ends,
|
| 594 |
+
)
|
| 595 |
+
plt.imshow(Z, cmap=cmap, origin="lower") # type: ignore
|
| 596 |
+
plt.title(title or f"Diverging {start.upper()} ↔ {end.upper()}")
|
| 597 |
+
if with_colorbar:
|
| 598 |
+
plt.colorbar()
|
| 599 |
+
plt.show()
|
| 600 |
+
|
| 601 |
+
@classmethod
|
| 602 |
+
def demo_heatmap_sequential(
|
| 603 |
+
cls,
|
| 604 |
+
*,
|
| 605 |
+
start: Literal["primary", "secondary", "tertiary"] = "primary",
|
| 606 |
+
reverse: bool = False,
|
| 607 |
+
title: Optional[str] = None,
|
| 608 |
+
apply_theme: bool = True,
|
| 609 |
+
) -> None:
|
| 610 |
+
"""Affiche une heatmap Seaborn en mode séquentiel.
|
| 611 |
+
|
| 612 |
+
Exemple
|
| 613 |
+
-------
|
| 614 |
+
>>> Theme.demo_heatmap_sequential(start="primary")
|
| 615 |
+
"""
|
| 616 |
+
if apply_theme:
|
| 617 |
+
cls.apply()
|
| 618 |
+
data = cls._demo_matrix()
|
| 619 |
+
plt.figure(figsize=(6, 4))
|
| 620 |
+
sns.heatmap(data, cmap=cls.colormap("sequential", start=start, reverse=reverse)) # type: ignore
|
| 621 |
+
direction = "foncé → clair" if reverse else "clair → foncé"
|
| 622 |
+
plt.title(title or f"Heatmap séquentielle - {start.upper()} ({direction})")
|
| 623 |
+
plt.show()
|
| 624 |
+
|
| 625 |
+
@classmethod
|
| 626 |
+
def demo_heatmap_diverging(
|
| 627 |
+
cls,
|
| 628 |
+
*,
|
| 629 |
+
start: Literal["primary", "secondary", "tertiary"] = "primary",
|
| 630 |
+
end: Literal["primary", "secondary", "tertiary"] = "tertiary",
|
| 631 |
+
reverse: bool = False,
|
| 632 |
+
title: Optional[str] = None,
|
| 633 |
+
apply_theme: bool = True,
|
| 634 |
+
) -> None:
|
| 635 |
+
"""Affiche une heatmap Seaborn en mode divergent.
|
| 636 |
+
|
| 637 |
+
Exemple
|
| 638 |
+
-------
|
| 639 |
+
>>> Theme.demo_heatmap_diverging(start="primary", end="tertiary")
|
| 640 |
+
"""
|
| 641 |
+
if apply_theme:
|
| 642 |
+
cls.apply()
|
| 643 |
+
data = cls._demo_matrix()
|
| 644 |
+
plt.figure(figsize=(6, 4))
|
| 645 |
+
sns.heatmap(data, cmap=cls.colormap("diverging", start=start, end=end, reverse=reverse)) # type: ignore
|
| 646 |
+
plt.title(title or f"Heatmap diverging - {start.upper()} ↔ {end.upper()}")
|
| 647 |
+
plt.show()
|
| 648 |
+
|
| 649 |
+
|
| 650 |
+
|
| 651 |
+
# API fonctionnelle (compatibilité)
|
| 652 |
+
|
| 653 |
+
def set_theme():
|
| 654 |
+
"""Applique le thème OC et retourne la palette étendue.
|
| 655 |
+
|
| 656 |
+
Raccourci rétrocompatible de :meth:`Theme.apply`.
|
| 657 |
+
"""
|
| 658 |
+
return Theme.apply()
|
| 659 |
+
|
| 660 |
+
|
| 661 |
+
def set_colormap(
|
| 662 |
+
mode: Literal["primary", "secondary", "tertiary", "sequential", "diverging"] = "primary",
|
| 663 |
+
*,
|
| 664 |
+
start: Optional[Literal["primary", "secondary", "tertiary"]] = None,
|
| 665 |
+
end: Optional[Literal["primary", "secondary", "tertiary"]] = None,
|
| 666 |
+
reverse: bool = False,
|
| 667 |
+
as_cmap: bool = True,
|
| 668 |
+
):
|
| 669 |
+
"""Raccourci pour obtenir un colormap OC.
|
| 670 |
+
|
| 671 |
+
Voir :meth:`Theme.colormap` pour le détail des paramètres.
|
| 672 |
+
"""
|
| 673 |
+
return Theme.colormap(mode, start=start, end=end, reverse=reverse, as_cmap=as_cmap)
|
| 674 |
+
|
| 675 |
+
# Configuration par défaut (externe à la classe)
|
| 676 |
+
_default_cfg = ThemeConfig(
|
| 677 |
+
primary="#7451EB",
|
| 678 |
+
secondary="#EE8273",
|
| 679 |
+
tertiary="#A6BD63",
|
| 680 |
+
background="#FFFCF2",
|
| 681 |
+
primary_variants=["#9D7EF0", "#4B25D6"],
|
| 682 |
+
secondary_variants=["#F3A093", "#D95848"],
|
| 683 |
+
tertiary_variants=["#BDD681", "#7E923F"],
|
| 684 |
+
sequential_light={
|
| 685 |
+
"primary": "#f3f0fd",
|
| 686 |
+
"secondary": "#fdecea",
|
| 687 |
+
"tertiary": "#f6faec",
|
| 688 |
+
},
|
| 689 |
+
sequential_dark={
|
| 690 |
+
"primary": "#2f1577",
|
| 691 |
+
"secondary": "#8b3025",
|
| 692 |
+
"tertiary": "#4b5c27",
|
| 693 |
+
},
|
| 694 |
+
text_color="black",
|
| 695 |
+
)
|
| 696 |
+
configure_theme(_default_cfg)
|
| 697 |
+
def mix_colors(color1: str, color2: str, ratio: float = 0.5) -> str:
|
| 698 |
+
"""Mélange deux couleurs hexadécimales selon ``ratio`` (0-1)."""
|
| 699 |
+
rgb1 = np.array(mcolors.to_rgb(color1))
|
| 700 |
+
rgb2 = np.array(mcolors.to_rgb(color2))
|
| 701 |
+
mixed = (1 - ratio) * rgb1 + ratio * rgb2
|
| 702 |
+
return mcolors.to_hex(mixed) # type: ignore
|
| 703 |
+
|
| 704 |
+
|
| 705 |
+
def make_diverging_cmap(
|
| 706 |
+
primary: str,
|
| 707 |
+
secondary: str,
|
| 708 |
+
name: str = "custom_diverging",
|
| 709 |
+
ratio: float = 0.5,
|
| 710 |
+
):
|
| 711 |
+
"""Crée un colormap divergent simple (primary → mix → secondary)."""
|
| 712 |
+
mid = mix_colors(primary, secondary, ratio=ratio)
|
| 713 |
+
return mcolors.LinearSegmentedColormap.from_list(name, [primary, mid, secondary])
|