GitHub Actions commited on
Commit
53af998
·
1 Parent(s): 9a917c1

🚀 Auto-deploy from GitHub Actions

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .github/workflows/deploy.yml +2 -2
  2. hf_space/hf_space/hf_space/README.md +34 -18
  3. hf_space/hf_space/hf_space/hf_space/LICENSE +10 -0
  4. hf_space/hf_space/hf_space/hf_space/Makefile +85 -0
  5. hf_space/hf_space/hf_space/hf_space/app.py +178 -4
  6. hf_space/hf_space/hf_space/hf_space/docs/.gitkeep +0 -0
  7. hf_space/hf_space/hf_space/hf_space/docs/README.md +12 -0
  8. hf_space/hf_space/hf_space/hf_space/docs/docs/getting-started.md +6 -0
  9. hf_space/hf_space/hf_space/hf_space/docs/docs/index.md +10 -0
  10. hf_space/hf_space/hf_space/hf_space/docs/mkdocs.yml +4 -0
  11. hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +7 -4
  12. hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +191 -1
  13. hf_space/hf_space/hf_space/hf_space/hf_space/README.md +328 -0
  14. hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py +0 -0
  15. hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py +7 -0
  16. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +37 -0
  17. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes +35 -0
  18. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +2 -0
  19. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +12 -0
  20. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +7 -0
  21. hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py +17 -0
  22. hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep +0 -0
  23. hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
  24. hf_space/hf_space/hf_space/hf_space/poetry.lock +0 -0
  25. hf_space/hf_space/hf_space/hf_space/poetry.toml +2 -0
  26. hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py +1 -0
  27. hf_space/hf_space/hf_space/hf_space/projet_05/config.py +32 -0
  28. hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py +29 -0
  29. hf_space/hf_space/hf_space/hf_space/projet_05/features.py +29 -0
  30. hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py +0 -0
  31. hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py +30 -0
  32. hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py +30 -0
  33. hf_space/hf_space/hf_space/hf_space/projet_05/plots.py +29 -0
  34. hf_space/hf_space/hf_space/hf_space/pyproject.toml +53 -0
  35. hf_space/hf_space/hf_space/hf_space/references/.gitkeep +0 -0
  36. hf_space/hf_space/hf_space/hf_space/reports/.gitkeep +0 -0
  37. hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep +0 -0
  38. hf_space/hf_space/hf_space/hf_space/tests/test_data.py +5 -0
  39. hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
  40. hf_space/hf_space/hf_space/projet_05/__init__.py +3 -0
  41. hf_space/hf_space/hf_space/projet_05/branding.py +52 -0
  42. hf_space/hf_space/hf_space/projet_05/dataset.py +188 -14
  43. hf_space/hf_space/hf_space/projet_05/explainability.py +102 -0
  44. hf_space/hf_space/hf_space/projet_05/features.py +156 -14
  45. hf_space/hf_space/hf_space/projet_05/modeling/predict.py +84 -14
  46. hf_space/hf_space/hf_space/projet_05/modeling/train.py +328 -15
  47. hf_space/hf_space/hf_space/projet_05/settings.py +114 -0
  48. hf_space/hf_space/hf_space/projet_05/settings.yml +56 -0
  49. hf_space/hf_space/hf_space/scripts_projet04/brand/__init__.py +0 -0
  50. hf_space/hf_space/hf_space/scripts_projet04/brand/brand.py +713 -0
.github/workflows/deploy.yml CHANGED
@@ -33,8 +33,8 @@ jobs:
33
  git config --global user.email "actions@github.com"
34
  git config --global user.name "GitHub Actions"
35
  git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
36
- rsync -av --exclude '.git' ./ hf_space/
37
  cd hf_space
38
  git add .
39
  git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
40
- git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
 
33
  git config --global user.email "actions@github.com"
34
  git config --global user.name "GitHub Actions"
35
  git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
36
+ rsync -av --exclude '.git' --exclude 'output/' --exclude 'models/' ./ hf_space/
37
  cd hf_space
38
  git add .
39
  git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
40
+ git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
hf_space/hf_space/hf_space/README.md CHANGED
@@ -1,5 +1,17 @@
1
  # projet_05
2
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
4
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
5
  </a>
@@ -57,6 +69,11 @@ Déployez un modèle de Machine Learning
57
  └── plots.py <- Code to create visualizations
58
  ```
59
 
 
 
 
 
 
60
  --------
61
 
62
  ---
@@ -93,6 +110,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
93
  *** https://www.markdownguide.org/basic-syntax/#reference-style-links
94
  -->
95
  [![Contributors][contributors-shield]][contributors-url]
 
96
  [![Forks][forks-shield]][forks-url]
97
  [![Stargazers][stars-shield]][stars-url]
98
  [![Issues][issues-shield]][issues-url]
@@ -236,7 +254,7 @@ _For more examples, please refer to the [Documentation](https://example.com)_
236
  - [ ] Feature 3
237
  - [ ] Nested Feature
238
 
239
- See the [open issues](https://github.com/github_username/repo_name/issues) for a full list of proposed features (and known issues).
240
 
241
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
242
 
@@ -299,18 +317,18 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
299
 
300
  <!-- MARKDOWN LINKS & IMAGES -->
301
  <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
302
- [contributors-shield]: https://img.shields.io/github/contributors/github_username/repo_name.svg?style=for-the-badge
303
- [contributors-url]: https://github.com/github_username/repo_name/graphs/contributors
304
- [forks-shield]: https://img.shields.io/github/forks/github_username/repo_name.svg?style=for-the-badge
305
- [forks-url]: https://github.com/github_username/repo_name/network/members
306
- [stars-shield]: https://img.shields.io/github/stars/github_username/repo_name.svg?style=for-the-badge
307
- [stars-url]: https://github.com/github_username/repo_name/stargazers
308
- [issues-shield]: https://img.shields.io/github/issues/github_username/repo_name.svg?style=for-the-badge
309
- [issues-url]: https://github.com/github_username/repo_name/issues
310
- [license-shield]: https://img.shields.io/github/license/github_username/repo_name.svg?style=for-the-badge
311
- [license-url]: https://github.com/github_username/repo_name/blob/master/LICENSE.txt
312
  [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
313
- [linkedin-url]: https://linkedin.com/in/linkedin_username
314
  [product-screenshot]: images/screenshot.png
315
  [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
316
  <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
@@ -331,10 +349,8 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
331
  [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
332
  [JQuery-url]: https://jquery.com
333
  <!-- TODO: -->
334
- [![Postgres](https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white)](#)
335
- [![Python](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)](#)
336
- [![Sphinx](https://img.shields.io/badge/Sphinx-000?logo=sphinx&logoColor=fff)](#)
337
- [![MkDocs](https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff)](#)
338
- [![NumPy](https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff)](#)
339
  [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
340
- [![Slack](https://img.shields.io/badge/Slack-4A154B?logo=slack&logoColor=fff)](#)[text](../projet_04/.gitignore)
 
1
  # projet_05
2
 
3
+ ---
4
+ title: OCR_Projet05
5
+ emoji: 🔥
6
+ colorFrom: purple
7
+ colorTo: purple
8
+ sdk: gradio
9
+ sdk_version: 5.49.1
10
+ app_file: app.py
11
+ pinned: true
12
+ short_description: Projet 05 formation Openclassrooms
13
+ ---
14
+
15
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
16
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
17
  </a>
 
69
  └── plots.py <- Code to create visualizations
70
  ```
71
 
72
+ ## Code hérité réutilisé
73
+
74
+ - `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thème.
75
+ - `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
76
+
77
  --------
78
 
79
  ---
 
110
  *** https://www.markdownguide.org/basic-syntax/#reference-style-links
111
  -->
112
  [![Contributors][contributors-shield]][contributors-url]
113
+ [![Python][python]][python]
114
  [![Forks][forks-shield]][forks-url]
115
  [![Stargazers][stars-shield]][stars-url]
116
  [![Issues][issues-shield]][issues-url]
 
254
  - [ ] Feature 3
255
  - [ ] Nested Feature
256
 
257
+ See the [open issues](https://github.com/stephmnt/OCR_projet05/issues) for a full list of proposed features (and known issues).
258
 
259
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
260
 
 
317
 
318
  <!-- MARKDOWN LINKS & IMAGES -->
319
  <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
320
+ [contributors-shield]: https://img.shields.io/github/contributors/stephmnt/OCR_projet05.svg?style=for-the-badge
321
+ [contributors-url]: https://github.com/stephmnt/OCR_projet05/graphs/contributors
322
+ [forks-shield]: https://img.shields.io/github/forks/stephmnt/OCR_projet05.svg?style=for-the-badge
323
+ [forks-url]: https://github.com/stephmnt/OCR_projet05/network/members
324
+ [stars-shield]: https://img.shields.io/github/stars/stephmnt/OCR_projet05.svg?style=for-the-badge
325
+ [stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
326
+ [issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
327
+ [issues-url]: https://github.com/stephmnt/OCR_projet05/issues
328
+ [license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
329
+ [license-url]: https://github.com/stephmnt/OCR_projet05/blob/master/LICENSE.txt
330
  [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
331
+ [linkedin-url]: https://linkedin.com/in/stephanemanet
332
  [product-screenshot]: images/screenshot.png
333
  [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
334
  <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
 
349
  [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
350
  [JQuery-url]: https://jquery.com
351
  <!-- TODO: -->
352
+ [Postgres]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
353
+ [Python]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
354
+ [MkDocs]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
355
+ [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
 
356
  [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
 
hf_space/hf_space/hf_space/hf_space/LICENSE ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ The MIT License (MIT)
3
+ Copyright (c) 2025, Stéphane Manet
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10
+
hf_space/hf_space/hf_space/hf_space/Makefile ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #################################################################################
2
+ # GLOBALS #
3
+ #################################################################################
4
+
5
+ PROJECT_NAME = OCR_projet05
6
+ PYTHON_VERSION = 3.10
7
+ PYTHON_INTERPRETER = python
8
+
9
+ #################################################################################
10
+ # COMMANDS #
11
+ #################################################################################
12
+
13
+
14
+ ## Install Python dependencies
15
+ .PHONY: requirements
16
+ requirements:
17
+ pip install -e .
18
+
19
+
20
+
21
+
22
+ ## Delete all compiled Python files
23
+ .PHONY: clean
24
+ clean:
25
+ find . -type f -name "*.py[co]" -delete
26
+ find . -type d -name "__pycache__" -delete
27
+
28
+
29
+ ## Lint using ruff (use `make format` to do formatting)
30
+ .PHONY: lint
31
+ lint:
32
+ ruff format --check
33
+ ruff check
34
+
35
+ ## Format source code with ruff
36
+ .PHONY: format
37
+ format:
38
+ ruff check --fix
39
+ ruff format
40
+
41
+
42
+
43
+ ## Run tests
44
+ .PHONY: test
45
+ test:
46
+ python -m pytest tests
47
+
48
+
49
+ ## Set up Python interpreter environment
50
+ .PHONY: create_environment
51
+ create_environment:
52
+ @bash -c "if [ ! -z `which virtualenvwrapper.sh` ]; then source `which virtualenvwrapper.sh`; mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); else mkvirtualenv.bat $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); fi"
53
+ @echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
54
+
55
+
56
+
57
+
58
+ #################################################################################
59
+ # PROJECT RULES #
60
+ #################################################################################
61
+
62
+
63
+ ## Make dataset
64
+ .PHONY: data
65
+ data: requirements
66
+ $(PYTHON_INTERPRETER) projet_05/dataset.py
67
+
68
+
69
+ #################################################################################
70
+ # Self Documenting Commands #
71
+ #################################################################################
72
+
73
+ .DEFAULT_GOAL := help
74
+
75
+ define PRINT_HELP_PYSCRIPT
76
+ import re, sys; \
77
+ lines = '\n'.join([line for line in sys.stdin]); \
78
+ matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
79
+ print('Available rules:\n'); \
80
+ print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
81
+ endef
82
+ export PRINT_HELP_PYSCRIPT
83
+
84
+ help:
85
+ @$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
hf_space/hf_space/hf_space/hf_space/app.py CHANGED
@@ -1,7 +1,181 @@
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
  import gradio as gr
8
+ import pandas as pd
9
+ from loguru import logger
10
+
11
+ from projet_05.branding import apply_brand_theme
12
+ from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
13
+
14
+ MODEL_PATH = Path("models/best_model.joblib")
15
+ METADATA_PATH = Path("models/best_model_meta.json")
16
+ SCHEMA_PATH = Path("data/processed/schema.json")
17
+
18
+
19
+ def _load_schema(path: Path) -> dict[str, Any]:
20
+ if not path.exists():
21
+ return {}
22
+ return json.loads(path.read_text(encoding="utf-8"))
23
+
24
+
25
+ def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
26
+ if schema:
27
+ candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
28
+ if candidates:
29
+ return candidates
30
+ features = metadata.get("features", {})
31
+ explicit = (features.get("numerical") or []) + (features.get("categorical") or [])
32
+ if explicit:
33
+ return explicit
34
+ if pipeline is not None and hasattr(pipeline, "feature_names_in_"):
35
+ return list(pipeline.feature_names_in_)
36
+ return []
37
+
38
+
39
+ def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
40
+ if isinstance(payload, pd.DataFrame):
41
+ df = payload.copy()
42
+ elif payload is None:
43
+ df = pd.DataFrame(columns=headers)
44
+ else:
45
+ df = pd.DataFrame(payload, columns=headers if headers else None)
46
+ df = df.dropna(how="all")
47
+ if df.empty:
48
+ raise gr.Error("Merci de saisir au moins une ligne complète.")
49
+ return df
50
+
51
+
52
+ def _ensure_model():
53
+ if PIPELINE is None:
54
+ raise gr.Error(
55
+ "Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
56
+ )
57
+
58
+
59
+ def score_table(table):
60
+ _ensure_model()
61
+ df = _convert_input(table, FEATURE_ORDER)
62
+ drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
63
+ return run_inference(
64
+ df,
65
+ PIPELINE,
66
+ THRESHOLD,
67
+ drop_columns=drop_cols,
68
+ required_features=FEATURE_ORDER or None,
69
+ )
70
+
71
+
72
+ def score_csv(upload):
73
+ _ensure_model()
74
+ if upload is None:
75
+ raise gr.Error("Veuillez déposer un fichier CSV.")
76
+ df = pd.read_csv(upload.name)
77
+ drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
78
+ return run_inference(
79
+ df,
80
+ PIPELINE,
81
+ THRESHOLD,
82
+ drop_columns=drop_cols,
83
+ required_features=FEATURE_ORDER or None,
84
+ )
85
+
86
+
87
+ def predict_from_form(*values):
88
+ _ensure_model()
89
+ if not FEATURE_ORDER:
90
+ raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
91
+ payload = {feature: value for feature, value in zip(FEATURE_ORDER, values)}
92
+ df = pd.DataFrame([payload])
93
+ scored = run_inference(
94
+ df,
95
+ PIPELINE,
96
+ THRESHOLD,
97
+ required_features=FEATURE_ORDER or None,
98
+ )
99
+ row = scored.iloc[0]
100
+ label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
101
+ return {
102
+ "probability": round(float(row["proba_depart"]), 4),
103
+ "decision": label,
104
+ "threshold": THRESHOLD,
105
+ }
106
+
107
+
108
+ # Chargement des artéfacts
109
+ apply_brand_theme()
110
+
111
+ PIPELINE = None
112
+ METADATA: dict[str, Any] = {}
113
+ THRESHOLD = 0.5
114
+ TARGET_COLUMN: str | None = None
115
+ SCHEMA = _load_schema(SCHEMA_PATH)
116
+
117
+ try:
118
+ PIPELINE = load_pipeline(MODEL_PATH)
119
+ METADATA = load_metadata(METADATA_PATH)
120
+ THRESHOLD = float(METADATA.get("best_threshold", THRESHOLD))
121
+ TARGET_COLUMN = METADATA.get("target")
122
+ except FileNotFoundError as exc:
123
+ logger.warning("Artéfact manquant: {}", exc)
124
+
125
+ FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
126
+
127
+ with gr.Blocks(title="Prédicteur d'attrition") as demo:
128
+ gr.Markdown("# API Gradio – Prédiction de départ employé")
129
+ gr.Markdown(
130
+ "Le modèle applique le pipeline entraîné hors-notebook pour fournir une probabilité de départ ainsi qu'une décision binaire."
131
+ )
132
+
133
+ if PIPELINE is None:
134
+ gr.Markdown(
135
+ "⚠️ **Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
136
+ )
137
+ else:
138
+ gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
139
+
140
+ with gr.Tab("Formulaire unitaire"):
141
+ if not FEATURE_ORDER:
142
+ gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
143
+ else:
144
+ form_inputs: list[gr.components.Component] = [] # type: ignore
145
+ for feature in FEATURE_ORDER:
146
+ form_inputs.append(
147
+ gr.Textbox(label=feature, placeholder=f"Saisir {feature.replace('_', ' ')}")
148
+ )
149
+ form_output = gr.JSON(label="Résultat")
150
+ gr.Button("Prédire").click(
151
+ fn=predict_from_form,
152
+ inputs=form_inputs,
153
+ outputs=form_output,
154
+ )
155
+
156
+ with gr.Tab("Tableau interactif"):
157
+ table_input = gr.Dataframe(
158
+ headers=FEATURE_ORDER if FEATURE_ORDER else None,
159
+ row_count=(1, "dynamic"),
160
+ col_count=(len(FEATURE_ORDER), "dynamic") if FEATURE_ORDER else (5, "dynamic"),
161
+ type="pandas",
162
+ )
163
+ table_output = gr.Dataframe(label="Prédictions", type="pandas")
164
+ gr.Button("Scorer les lignes").click(
165
+ fn=score_table,
166
+ inputs=table_input,
167
+ outputs=table_output,
168
+ )
169
+
170
+ with gr.Tab("Fichier CSV"):
171
+ file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
172
+ file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
173
+ gr.Button("Scorer le fichier").click(
174
+ fn=score_csv,
175
+ inputs=file_input,
176
+ outputs=file_output,
177
+ )
178
 
 
 
179
 
180
+ if __name__ == "__main__":
181
+ demo.launch()
hf_space/hf_space/hf_space/hf_space/docs/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/docs/README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generating the docs
2
+ ----------
3
+
4
+ Use [mkdocs](http://www.mkdocs.org/) structure to update the documentation.
5
+
6
+ Build locally with:
7
+
8
+ mkdocs build
9
+
10
+ Serve locally with:
11
+
12
+ mkdocs serve
hf_space/hf_space/hf_space/hf_space/docs/docs/getting-started.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Getting started
2
+ ===============
3
+
4
+ This is where you describe how to get set up on a clean install, including the
5
+ commands necessary to get the raw data (using the `sync_data_from_s3` command,
6
+ for example), and then how to make the cleaned, final data sets.
hf_space/hf_space/hf_space/hf_space/docs/docs/index.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # projet_05 documentation!
2
+
3
+ ## Description
4
+
5
+ Déployez un modèle de Machine Learning
6
+
7
+ ## Commands
8
+
9
+ The Makefile contains the central entry points for common tasks related to this project.
10
+
hf_space/hf_space/hf_space/hf_space/docs/mkdocs.yml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ site_name: projet_05
2
+ #
3
+ site_author: Stéphane Manet
4
+ #
hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml CHANGED
@@ -1,10 +1,13 @@
1
- name: Déployer vers Hugging Face Spaces
2
 
3
  on:
4
  push:
5
  branches:
6
  - main
7
 
 
 
 
8
  jobs:
9
  deploy:
10
  runs-on: ubuntu-latest
@@ -23,7 +26,7 @@ jobs:
23
  python -m pip install --upgrade pip
24
  if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
25
 
26
- - name: Push to Hugging Face Space
27
  env:
28
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
29
  run: |
@@ -33,5 +36,5 @@ jobs:
33
  rsync -av --exclude '.git' ./ hf_space/
34
  cd hf_space
35
  git add .
36
- git commit -m "🚀 Auto-deploy from GitHub Actions"
37
- git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
 
1
+ name: Deploy to Hugging Face Spaces
2
 
3
  on:
4
  push:
5
  branches:
6
  - main
7
 
8
+ permissions:
9
+ contents: write
10
+
11
  jobs:
12
  deploy:
13
  runs-on: ubuntu-latest
 
26
  python -m pip install --upgrade pip
27
  if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28
 
29
+ - name: Deploy to Hugging Face Space
30
  env:
31
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
32
  run: |
 
36
  rsync -av --exclude '.git' ./ hf_space/
37
  cd hf_space
38
  git add .
39
+ git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
40
+ git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore CHANGED
@@ -1,2 +1,192 @@
 
 
 
 
 
1
  *.code-workspace
2
- .venv/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Data
2
+ /data/
3
+
4
+ # Mac OS-specific storage files
5
+ .DS_Store
6
  *.code-workspace
7
+
8
+ # vim
9
+ *.swp
10
+ *.swo
11
+
12
+ ## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
13
+
14
+ # Byte-compiled / optimized / DLL files
15
+ __pycache__/
16
+ *.py[cod]
17
+ *$py.class
18
+
19
+ # C extensions
20
+ *.so
21
+
22
+ # Distribution / packaging
23
+ .Python
24
+ build/
25
+ develop-eggs/
26
+ dist/
27
+ downloads/
28
+ eggs/
29
+ .eggs/
30
+ lib/
31
+ lib64/
32
+ parts/
33
+ sdist/
34
+ var/
35
+ wheels/
36
+ share/python-wheels/
37
+ *.egg-info/
38
+ .installed.cfg
39
+ *.egg
40
+ MANIFEST
41
+
42
+ # PyInstaller
43
+ # Usually these files are written by a python script from a template
44
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
45
+ *.manifest
46
+ *.spec
47
+
48
+ # Installer logs
49
+ pip-log.txt
50
+ pip-delete-this-directory.txt
51
+
52
+ # Unit test / coverage reports
53
+ htmlcov/
54
+ .tox/
55
+ .nox/
56
+ .coverage
57
+ .coverage.*
58
+ .cache
59
+ nosetests.xml
60
+ coverage.xml
61
+ *.cover
62
+ *.py,cover
63
+ .hypothesis/
64
+ .pytest_cache/
65
+ cover/
66
+
67
+ # Translations
68
+ *.mo
69
+ *.pot
70
+
71
+ # Django stuff:
72
+ *.log
73
+ local_settings.py
74
+ db.sqlite3
75
+ db.sqlite3-journal
76
+
77
+ # Flask stuff:
78
+ instance/
79
+ .webassets-cache
80
+
81
+ # Scrapy stuff:
82
+ .scrapy
83
+
84
+ # MkDocs documentation
85
+ docs/site/
86
+
87
+ # PyBuilder
88
+ .pybuilder/
89
+ target/
90
+
91
+ # Jupyter Notebook
92
+ .ipynb_checkpoints
93
+
94
+ # IPython
95
+ profile_default/
96
+ ipython_config.py
97
+
98
+ # pyenv
99
+ # For a library or package, you might want to ignore these files since the code is
100
+ # intended to run in multiple environments; otherwise, check them in:
101
+ # .python-version
102
+
103
+ # pipenv
104
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
105
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
106
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
107
+ # install all needed dependencies.
108
+ #Pipfile.lock
109
+
110
+ # UV
111
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
112
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
113
+ # commonly ignored for libraries.
114
+ #uv.lock
115
+
116
+ # poetry
117
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
118
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
119
+ # commonly ignored for libraries.
120
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
121
+ #poetry.lock
122
+
123
+ # pdm
124
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
125
+ #pdm.lock
126
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
127
+ # in version control.
128
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
129
+ .pdm.toml
130
+ .pdm-python
131
+ .pdm-build/
132
+
133
+ # pixi
134
+ # pixi.lock should be committed to version control for reproducibility
135
+ # .pixi/ contains the environments and should not be committed
136
+ .pixi/
137
+
138
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
139
+ __pypackages__/
140
+
141
+ # Celery stuff
142
+ celerybeat-schedule
143
+ celerybeat.pid
144
+
145
+ # SageMath parsed files
146
+ *.sage.py
147
+
148
+ # Environments
149
+ .env
150
+ .venv
151
+ env/
152
+ venv/
153
+ ENV/
154
+ env.bak/
155
+ venv.bak/
156
+
157
+ # Spyder project settings
158
+ .spyderproject
159
+ .spyproject
160
+
161
+ # Rope project settings
162
+ .ropeproject
163
+
164
+ # mkdocs documentation
165
+ /site
166
+
167
+ # mypy
168
+ .mypy_cache/
169
+ .dmypy.json
170
+ dmypy.json
171
+
172
+ # Pyre type checker
173
+ .pyre/
174
+
175
+ # pytype static type analyzer
176
+ .pytype/
177
+
178
+ # Cython debug symbols
179
+ cython_debug/
180
+
181
+ # PyCharm
182
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
183
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
184
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
185
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
186
+ #.idea/
187
+
188
+ # Ruff stuff:
189
+ .ruff_cache/
190
+
191
+ # PyPI configuration file
192
+ .pypirc
hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED
@@ -1,3 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Projet 05
3
  emoji: đź‘€
@@ -10,3 +71,270 @@ pinned: false
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # projet_05
2
+
3
+ <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
4
+ <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
5
+ </a>
6
+
7
+ Déployez un modèle de Machine Learning
8
+
9
+ ## Organisation du projet
10
+
11
+ ```
12
+ ├── LICENSE <- Open-source license if one is chosen
13
+ ├── Makefile <- Makefile with convenience commands like `make data` or `make train`
14
+ ├── README.md <- The top-level README for developers using this project.
15
+ ├── data
16
+ │ ├── external <- Data from third party sources.
17
+ │ ├── interim <- Intermediate data that has been transformed.
18
+ │ ├── processed <- The final, canonical data sets for modeling.
19
+ │ └── raw <- The original, immutable data dump.
20
+ │
21
+ ├── docs <- A default mkdocs project; see www.mkdocs.org for details
22
+ │
23
+ ├── models <- Trained and serialized models, model predictions, or model summaries
24
+ │
25
+ ├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
26
+ │ the creator's initials, and a short `-` delimited description, e.g.
27
+ │ `1.0-jqp-initial-data-exploration`.
28
+ │
29
+ ├── pyproject.toml <- Project configuration file with package metadata for
30
+ │ projet_05 and configuration for tools like black
31
+ │
32
+ ├── references <- Data dictionaries, manuals, and all other explanatory materials.
33
+ │
34
+ ├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
35
+ │ └── figures <- Generated graphics and figures to be used in reporting
36
+ │
37
+ ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
38
+ │ generated with `pip freeze > requirements.txt`
39
+ │
40
+ ├── setup.cfg <- Configuration file for flake8
41
+ │
42
+ └── projet_05 <- Source code for use in this project.
43
+ │
44
+ ├── __init__.py <- Makes projet_05 a Python module
45
+ │
46
+ ├── config.py <- Store useful variables and configuration
47
+ │
48
+ ├── dataset.py <- Scripts to download or generate data
49
+ │
50
+ ├── features.py <- Code to create features for modeling
51
+ │
52
+ ├── modeling
53
+ │ ├── __init__.py
54
+ │ ├── predict.py <- Code to run model inference with trained models
55
+ │ └── train.py <- Code to train models
56
+ │
57
+ └── plots.py <- Code to create visualizations
58
+ ```
59
+
60
+ --------
61
+
62
  ---
63
  title: Projet 05
64
  emoji: đź‘€
 
71
  ---
72
 
73
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
74
+
75
+ <!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
76
+ <a id="readme-top"></a>
77
+ <!--
78
+ *** Thanks for checking out the Best-README-Template. If you have a suggestion
79
+ *** that would make this better, please fork the repo and create a pull request
80
+ *** or simply open an issue with the tag "enhancement".
81
+ *** Don't forget to give the project a star!
82
+ *** Thanks again! Now go create something AMAZING! :D
83
+ -->
84
+
85
+
86
+
87
+ <!-- PROJECT SHIELDS -->
88
+ <!--
89
+ *** I'm using markdown "reference style" links for readability.
90
+ *** Reference links are enclosed in brackets [ ] instead of parentheses ( ).
91
+ *** See the bottom of this document for the declaration of the reference variables
92
+ *** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use.
93
+ *** https://www.markdownguide.org/basic-syntax/#reference-style-links
94
+ -->
95
+ [![Contributors][contributors-shield]][contributors-url]
96
+ [![Forks][forks-shield]][forks-url]
97
+ [![Stargazers][stars-shield]][stars-url]
98
+ [![Issues][issues-shield]][issues-url]
99
+ [![project_license][license-shield]][license-url]
100
+ [![LinkedIn][linkedin-shield]][linkedin-url]
101
+ ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
102
+
103
+
104
+
105
+ <!-- PROJECT LOGO -->
106
+ <br />
107
+ <div align="center">
108
+ <a href="https://github.com/github_username/repo_name">
109
+ <img src="images/logo.png" alt="Logo" width="80" height="80">
110
+ </a>
111
+
112
+ <h3 align="center">project_title</h3>
113
+
114
+ <p align="center">
115
+ project_description
116
+ <br />
117
+ <a href="https://github.com/github_username/repo_name"><strong>Explore the docs »</strong></a>
118
+ <br />
119
+ <br />
120
+ <a href="https://github.com/github_username/repo_name">View Demo</a>
121
+ &middot;
122
+ <a href="https://github.com/github_username/repo_name/issues/new?labels=bug&template=bug-report---.md">Report Bug</a>
123
+ &middot;
124
+ <a href="https://github.com/github_username/repo_name/issues/new?labels=enhancement&template=feature-request---.md">Request Feature</a>
125
+ </p>
126
+ </div>
127
+
128
+
129
+
130
+ <!-- TABLE OF CONTENTS -->
131
+ <details>
132
+ <summary>Table of Contents</summary>
133
+ <ol>
134
+ <li>
135
+ <a href="#about-the-project">About The Project</a>
136
+ <ul>
137
+ <li><a href="#built-with">Built With</a></li>
138
+ </ul>
139
+ </li>
140
+ <li>
141
+ <a href="#getting-started">Getting Started</a>
142
+ <ul>
143
+ <li><a href="#prerequisites">Prerequisites</a></li>
144
+ <li><a href="#installation">Installation</a></li>
145
+ </ul>
146
+ </li>
147
+ <li><a href="#usage">Usage</a></li>
148
+ <li><a href="#roadmap">Roadmap</a></li>
149
+ <li><a href="#contributing">Contributing</a></li>
150
+ <li><a href="#license">License</a></li>
151
+ <li><a href="#contact">Contact</a></li>
152
+ <li><a href="#acknowledgments">Acknowledgments</a></li>
153
+ </ol>
154
+ </details>
155
+
156
+
157
+
158
+ <!-- ABOUT THE PROJECT -->
159
+ ## About The Project
160
+
161
+ [![Product Name Screen Shot][product-screenshot]](https://example.com)
162
+
163
+ Here's a blank template to get started. To avoid retyping too much info, do a search and replace with your text editor for the following: `github_username`, `repo_name`, `twitter_handle`, `linkedin_username`, `email_client`, `email`, `project_title`, `project_description`, `project_license`
164
+
165
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
166
+
167
+
168
+
169
+ ### Built With
170
+
171
+ * [![Python][Python]][Python-url]
172
+ * [![SQL][SQL]][SQL-url]
173
+
174
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
175
+
176
+
177
+
178
+ <!-- GETTING STARTED -->
179
+ ## Getting Started
180
+
181
+ This is an example of how you may give instructions on setting up your project locally.
182
+ To get a local copy up and running follow these simple example steps.
183
+
184
+ ### Prerequisites
185
+
186
+ This is an example of how to list things you need to use the software and how to install them.
187
+ * npm
188
+ ```sh
189
+ npm install npm@latest -g
190
+ ```
191
+
192
+ ### Installation
193
+
194
+ pip install -r requirements.txt
195
+ uvicorn app.main:app --reload
196
+
197
+ 1. Get a free API Key at [https://example.com](https://example.com)
198
+ 2. Clone the repo
199
+ ```sh
200
+ git clone https://github.com/github_username/repo_name.git
201
+ ```
202
+ 3. Install NPM packages
203
+ ```sh
204
+ npm install
205
+ ```
206
+ 4. Enter your API in `config.js`
207
+ ```js
208
+ const API_KEY = 'ENTER YOUR API';
209
+ ```
210
+ 5. Change git remote url to avoid accidental pushes to base project
211
+ ```sh
212
+ git remote set-url origin github_username/repo_name
213
+ git remote -v # confirm the changes
214
+ ```
215
+
216
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
217
+
218
+
219
+
220
+ <!-- USAGE EXAMPLES -->
221
+ ## Usage
222
+
223
+ Use this space to show useful examples of how a project can be used. Additional screenshots, code examples and demos work well in this space. You may also link to more resources.
224
+
225
+ _For more examples, please refer to the [Documentation](https://example.com)_
226
+
227
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
228
+
229
+
230
+
231
+ <!-- ROADMAP -->
232
+ ## Roadmap
233
+
234
+ - [ ] Feature 1
235
+ - [ ] Feature 2
236
+ - [ ] Feature 3
237
+ - [ ] Nested Feature
238
+
239
+ See the [open issues](https://github.com/github_username/repo_name/issues) for a full list of proposed features (and known issues).
240
+
241
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
242
+
243
+
244
+
245
+ <!-- CONTRIBUTING -->
246
+ ## Contributing
247
+
248
+ Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**.
249
+
250
+ If you have a suggestion that would make this better, please fork the repo and create a pull request. You can also simply open an issue with the tag "enhancement".
251
+ Don't forget to give the project a star! Thanks again!
252
+
253
+ 1. Fork the Project
254
+ 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
255
+ 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
256
+ 4. Push to the Branch (`git push origin feature/AmazingFeature`)
257
+ 5. Open a Pull Request
258
+
259
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
260
+
261
+ ### Top contributors:
262
+
263
+ <a href="https://github.com/github_username/repo_name/graphs/contributors">
264
+ <img src="https://contrib.rocks/image?repo=github_username/repo_name" alt="contrib.rocks image" />
265
+ </a>
266
+
267
+
268
+
269
+ <!-- LICENSE -->
270
+ ## License
271
+
272
+ Distributed under the project_license. See `LICENSE.txt` for more information.
273
+
274
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
275
+
276
+
277
+
278
+ <!-- CONTACT -->
279
+ ## Contact
280
+
281
+ Your Name - [@twitter_handle](https://twitter.com/twitter_handle) - email@email_client.com
282
+
283
+ Project Link: [https://github.com/github_username/repo_name](https://github.com/github_username/repo_name)
284
+
285
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
286
+
287
+
288
+
289
+ <!-- ACKNOWLEDGMENTS -->
290
+ ## Acknowledgments
291
+
292
+ * []()
293
+ * []()
294
+ * []()
295
+
296
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
297
+
298
+
299
+
300
+ <!-- MARKDOWN LINKS & IMAGES -->
301
+ <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
302
+ [contributors-shield]: https://img.shields.io/github/contributors/github_username/repo_name.svg?style=for-the-badge
303
+ [contributors-url]: https://github.com/github_username/repo_name/graphs/contributors
304
+ [forks-shield]: https://img.shields.io/github/forks/github_username/repo_name.svg?style=for-the-badge
305
+ [forks-url]: https://github.com/github_username/repo_name/network/members
306
+ [stars-shield]: https://img.shields.io/github/stars/github_username/repo_name.svg?style=for-the-badge
307
+ [stars-url]: https://github.com/github_username/repo_name/stargazers
308
+ [issues-shield]: https://img.shields.io/github/issues/github_username/repo_name.svg?style=for-the-badge
309
+ [issues-url]: https://github.com/github_username/repo_name/issues
310
+ [license-shield]: https://img.shields.io/github/license/github_username/repo_name.svg?style=for-the-badge
311
+ [license-url]: https://github.com/github_username/repo_name/blob/master/LICENSE.txt
312
+ [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
313
+ [linkedin-url]: https://linkedin.com/in/linkedin_username
314
+ [product-screenshot]: images/screenshot.png
315
+ [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
316
+ <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
317
+ [Next.js]: https://img.shields.io/badge/next.js-000000?style=for-the-badge&logo=nextdotjs&logoColor=white
318
+ [Next-url]: https://nextjs.org/
319
+ [React.js]: https://img.shields.io/badge/React-20232A?style=for-the-badge&logo=react&logoColor=61DAFB
320
+ [React-url]: https://reactjs.org/
321
+ [Vue.js]: https://img.shields.io/badge/Vue.js-35495E?style=for-the-badge&logo=vuedotjs&logoColor=4FC08D
322
+ [Vue-url]: https://vuejs.org/
323
+ [Angular.io]: https://img.shields.io/badge/Angular-DD0031?style=for-the-badge&logo=angular&logoColor=white
324
+ [Angular-url]: https://angular.io/
325
+ [Svelte.dev]: https://img.shields.io/badge/Svelte-4A4A55?style=for-the-badge&logo=svelte&logoColor=FF3E00
326
+ [Svelte-url]: https://svelte.dev/
327
+ [Laravel.com]: https://img.shields.io/badge/Laravel-FF2D20?style=for-the-badge&logo=laravel&logoColor=white
328
+ [Laravel-url]: https://laravel.com
329
+ [Bootstrap.com]: https://img.shields.io/badge/Bootstrap-563D7C?style=for-the-badge&logo=bootstrap&logoColor=white
330
+ [Bootstrap-url]: https://getbootstrap.com
331
+ [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
332
+ [JQuery-url]: https://jquery.com
333
+ <!-- TODO: -->
334
+ [![Postgres](https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white)](#)
335
+ [![Python](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)](#)
336
+ [![Sphinx](https://img.shields.io/badge/Sphinx-000?logo=sphinx&logoColor=fff)](#)
337
+ [![MkDocs](https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff)](#)
338
+ [![NumPy](https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff)](#)
339
+ [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
340
+ [![Slack](https://img.shields.io/badge/Slack-4A154B?logo=slack&logoColor=fff)](#)[text](../projet_04/.gitignore)
hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ demo.launch()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Déployer vers Hugging Face Spaces
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout repository
14
+ uses: actions/checkout@v4
15
+
16
+ - name: Setup Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.10"
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
25
+
26
+ - name: Push to Hugging Face Space
27
+ env:
28
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
29
+ run: |
30
+ git config --global user.email "actions@github.com"
31
+ git config --global user.name "GitHub Actions"
32
+ git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
33
+ rsync -av --exclude '.git' ./ hf_space/
34
+ cd hf_space
35
+ git add .
36
+ git commit -m "🚀 Auto-deploy from GitHub Actions"
37
+ git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.code-workspace
2
+ .venv/
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Projet 05
3
+ emoji: đź‘€
4
+ colorFrom: indigo
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 5.49.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ demo.launch()
hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from app.main import greet
3
+
4
+ def test_greet_returns_string():
5
+ """Vérifie que la fonction retourne bien une chaîne de caractères."""
6
+ result = greet("Alice")
7
+ assert isinstance(result, str), "Le résultat doit être une chaîne de caractères."
8
+
9
+ def test_greet_output_content():
10
+ """Vérifie que la fonction génère la phrase attendue."""
11
+ result = greet("Bob")
12
+ assert result == "Hello Bob!!", f"Résultat inattendu : {result}"
13
+
14
+ def test_greet_with_empty_string():
15
+ """Vérifie le comportement si l’entrée est vide."""
16
+ result = greet("")
17
+ assert result == "Hello !!", "Le résultat doit gérer les entrées vides."
hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
hf_space/hf_space/hf_space/hf_space/poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
hf_space/hf_space/hf_space/hf_space/poetry.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [virtualenvs]
2
+ in-project = true
hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from projet_05 import config # noqa: F401
hf_space/hf_space/hf_space/hf_space/projet_05/config.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from dotenv import load_dotenv
4
+ from loguru import logger
5
+
6
+ # Load environment variables from .env file if it exists
7
+ load_dotenv()
8
+
9
+ # Paths
10
+ PROJ_ROOT = Path(__file__).resolve().parents[1]
11
+ logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")
12
+
13
+ DATA_DIR = PROJ_ROOT / "data"
14
+ RAW_DATA_DIR = DATA_DIR / "raw"
15
+ INTERIM_DATA_DIR = DATA_DIR / "interim"
16
+ PROCESSED_DATA_DIR = DATA_DIR / "processed"
17
+ EXTERNAL_DATA_DIR = DATA_DIR / "external"
18
+
19
+ MODELS_DIR = PROJ_ROOT / "models"
20
+
21
+ REPORTS_DIR = PROJ_ROOT / "reports"
22
+ FIGURES_DIR = REPORTS_DIR / "figures"
23
+
24
+ # If tqdm is installed, configure loguru with tqdm.write
25
+ # https://github.com/Delgan/loguru/issues/135
26
+ try:
27
+ from tqdm import tqdm
28
+
29
+ logger.remove(0)
30
+ logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
31
+ except ModuleNotFoundError:
32
+ pass
hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ input_path: Path = RAW_DATA_DIR / "dataset.csv",
16
+ output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
17
+ # ----------------------------------------------
18
+ ):
19
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
+ logger.info("Processing dataset...")
21
+ for i in tqdm(range(10), total=10):
22
+ if i == 5:
23
+ logger.info("Something happened for iteration 5.")
24
+ logger.success("Processing dataset complete.")
25
+ # -----------------------------------------
26
+
27
+
28
+ if __name__ == "__main__":
29
+ app()
hf_space/hf_space/hf_space/hf_space/projet_05/features.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
16
+ output_path: Path = PROCESSED_DATA_DIR / "features.csv",
17
+ # -----------------------------------------
18
+ ):
19
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
+ logger.info("Generating features from dataset...")
21
+ for i in tqdm(range(10), total=10):
22
+ if i == 5:
23
+ logger.info("Something happened for iteration 5.")
24
+ logger.success("Features generation complete.")
25
+ # -----------------------------------------
26
+
27
+
28
+ if __name__ == "__main__":
29
+ app()
hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ features_path: Path = PROCESSED_DATA_DIR / "test_features.csv",
16
+ model_path: Path = MODELS_DIR / "model.pkl",
17
+ predictions_path: Path = PROCESSED_DATA_DIR / "test_predictions.csv",
18
+ # -----------------------------------------
19
+ ):
20
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
21
+ logger.info("Performing inference for model...")
22
+ for i in tqdm(range(10), total=10):
23
+ if i == 5:
24
+ logger.info("Something happened for iteration 5.")
25
+ logger.success("Inference complete.")
26
+ # -----------------------------------------
27
+
28
+
29
+ if __name__ == "__main__":
30
+ app()
hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ features_path: Path = PROCESSED_DATA_DIR / "features.csv",
16
+ labels_path: Path = PROCESSED_DATA_DIR / "labels.csv",
17
+ model_path: Path = MODELS_DIR / "model.pkl",
18
+ # -----------------------------------------
19
+ ):
20
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
21
+ logger.info("Training some model...")
22
+ for i in tqdm(range(10), total=10):
23
+ if i == 5:
24
+ logger.info("Something happened for iteration 5.")
25
+ logger.success("Modeling training complete.")
26
+ # -----------------------------------------
27
+
28
+
29
+ if __name__ == "__main__":
30
+ app()
hf_space/hf_space/hf_space/hf_space/projet_05/plots.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import FIGURES_DIR, PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
16
+ output_path: Path = FIGURES_DIR / "plot.png",
17
+ # -----------------------------------------
18
+ ):
19
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
+ logger.info("Generating plot from data...")
21
+ for i in tqdm(range(10), total=10):
22
+ if i == 5:
23
+ logger.info("Something happened for iteration 5.")
24
+ logger.success("Plot generation complete.")
25
+ # -----------------------------------------
26
+
27
+
28
+ if __name__ == "__main__":
29
+ app()
hf_space/hf_space/hf_space/hf_space/pyproject.toml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["flit_core >=3.2,<4"]
3
+ build-backend = "flit_core.buildapi"
4
+
5
+ [project]
6
+ name = "projet_05"
7
+ version = "0.0.1"
8
+ description = "D\u00e9ployez un mod\u00e8le de Machine Learning"
9
+ authors = [
10
+ { name = "St\u00e9phane Manet" },
11
+ ]
12
+ license = { file = "LICENSE" }
13
+ readme = "README.md"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License"
17
+ ]
18
+ dependencies = [
19
+ "loguru",
20
+ "mkdocs",
21
+ "pip",
22
+ "pytest",
23
+ "python-dotenv",
24
+ "ruff",
25
+ "tqdm",
26
+ "typer",
27
+ "imbalanced-learn (>=0.14.0,<0.15.0)",
28
+ "scikit-learn (>=1.4.2,<2.0.0)",
29
+ "matplotlib (>=3.10.7,<4.0.0)",
30
+ "numpy (>=2.3.4,<3.0.0)",
31
+ "pandas (>=2.3.3,<3.0.0)",
32
+ "pyyaml (>=6.0.3,<7.0.0)",
33
+ "scipy (>=1.16.3,<2.0.0)",
34
+ "seaborn (>=0.13.2,<0.14.0)",
35
+ "shap (>=0.49.1,<0.50.0)",
36
+ "gradio (>=5.49.1,<6.0.0)",
37
+ "joblib (>=1.4.2,<2.0.0)"
38
+ ]
39
+
40
+ requires-python = ">=3.11,<3.13"
41
+
42
+
43
+ [tool.ruff]
44
+ line-length = 99
45
+ src = ["projet_05"]
46
+ include = ["pyproject.toml", "projet_05/**/*.py"]
47
+
48
+ [tool.ruff.lint]
49
+ extend-select = ["I"] # Add import sorting
50
+
51
+ [tool.ruff.lint.isort]
52
+ known-first-party = ["projet_05"]
53
+ force-sort-within-sections = true
hf_space/hf_space/hf_space/hf_space/references/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/reports/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/tests/test_data.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import pytest
2
+
3
+
4
+ def test_code_is_tested():
5
+ assert False
hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
hf_space/hf_space/hf_space/projet_05/__init__.py CHANGED
@@ -1 +1,4 @@
1
  from projet_05 import config # noqa: F401
 
 
 
 
1
  from projet_05 import config # noqa: F401
2
+ from projet_05.settings import Settings, load_settings # noqa: F401
3
+
4
+ __all__ = ["config", "Settings", "load_settings"]
hf_space/hf_space/hf_space/projet_05/branding.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from functools import lru_cache
4
+ from pathlib import Path
5
+ from typing import Union
6
+
7
+ from scripts_projet04.brand.brand import ( # type: ignore[import-not-found]
8
+ Theme,
9
+ ThemeConfig,
10
+ configure_brand,
11
+ load_brand,
12
+ make_diverging_cmap,
13
+ )
14
+
15
+ ROOT_DIR = Path(__file__).resolve().parents[1]
16
+ DEFAULT_BRAND_PATH = ROOT_DIR / "scripts_projet04" / "brand" / "brand.yml"
17
+
18
+
19
+ def _resolve_path(path: Union[str, Path, None]) -> Path:
20
+ if path is None:
21
+ return DEFAULT_BRAND_PATH
22
+ return Path(path).expanduser().resolve()
23
+
24
+
25
+ @lru_cache(maxsize=1)
26
+ def load_brand_config(path: Union[str, Path, None] = None) -> ThemeConfig:
27
+ """Load the brand YAML once and return the parsed ThemeConfig."""
28
+ cfg_path = _resolve_path(path)
29
+ return load_brand(cfg_path)
30
+
31
+
32
+ @lru_cache(maxsize=1)
33
+ def apply_brand_theme(path: Union[str, Path, None] = None) -> ThemeConfig:
34
+ """
35
+ Apply the OpenClassrooms/TechNova brand theme globally.
36
+
37
+ Returns the ThemeConfig so callers can inspect colors if needed.
38
+ """
39
+ cfg_path = _resolve_path(path)
40
+ cfg = configure_brand(cfg_path)
41
+ Theme.apply()
42
+ return cfg
43
+
44
+
45
+ __all__ = [
46
+ "Theme",
47
+ "ThemeConfig",
48
+ "apply_brand_theme",
49
+ "load_brand_config",
50
+ "make_diverging_cmap",
51
+ "DEFAULT_BRAND_PATH",
52
+ ]
hf_space/hf_space/hf_space/projet_05/dataset.py CHANGED
@@ -1,28 +1,202 @@
 
 
 
1
  from pathlib import Path
2
 
 
 
3
  from loguru import logger
4
- from tqdm import tqdm
5
  import typer
6
 
7
- from projet_05.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- app = typer.Typer()
 
 
 
 
10
 
 
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  @app.command()
13
  def main(
14
- # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
- input_path: Path = RAW_DATA_DIR / "dataset.csv",
16
- output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
17
- # ----------------------------------------------
 
 
 
 
 
 
 
 
18
  ):
19
- # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
- logger.info("Processing dataset...")
21
- for i in tqdm(range(10), total=10):
22
- if i == 5:
23
- logger.info("Something happened for iteration 5.")
24
- logger.success("Processing dataset complete.")
25
- # -----------------------------------------
26
 
27
 
28
  if __name__ == "__main__":
 
1
+ from __future__ import annotations
2
+
3
+ import sqlite3
4
  from pathlib import Path
5
 
6
+ import numpy as np
7
+ import pandas as pd
8
  from loguru import logger
 
9
  import typer
10
 
11
+ from projet_05.config import INTERIM_DATA_DIR
12
+ from projet_05.settings import Settings, load_settings
13
+
14
+ app = typer.Typer(help="Préparation et fusion des données sources.")
15
+
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # Utilitaires
19
+ # ---------------------------------------------------------------------------
20
+ def safe_read_csv(path: Path, *, dtype=None) -> pd.DataFrame:
21
+ """Read a CSV file and return an empty frame when it fails."""
22
+ try:
23
+ logger.info("Lecture du fichier {}", path)
24
+ return pd.read_csv(path, dtype=dtype)
25
+ except FileNotFoundError:
26
+ logger.warning("Fichier absent: {}", path)
27
+ return pd.DataFrame()
28
+ except Exception as exc: # pragma: no cover - log + empty dataframe
29
+ logger.error("Impossible de lire {} ({})", path, exc)
30
+ return pd.DataFrame()
31
+
32
+
33
+ def clean_text_values(df: pd.DataFrame) -> pd.DataFrame:
34
+ """Normalize textual values that often materialize missing values."""
35
+ replace_tokens = [
36
+ "",
37
+ " ",
38
+ " ",
39
+ " ",
40
+ "nan",
41
+ "NaN",
42
+ "NAN",
43
+ "None",
44
+ "JE ne sais pas",
45
+ "je ne sais pas",
46
+ "Je ne sais pas",
47
+ "Unknow",
48
+ "Unknown",
49
+ "non pertinent",
50
+ "Non pertinent",
51
+ "NON PERTINENT",
52
+ ]
53
+ normalized = df.copy()
54
+ normalized = normalized.replace(replace_tokens, np.nan)
55
+
56
+ for column in normalized.select_dtypes(include="object"):
57
+ normalized[column] = (
58
+ normalized[column].replace(replace_tokens, np.nan).astype("string").str.strip()
59
+ )
60
+ return normalized
61
+
62
+
63
+ def _harmonize_id_column(df: pd.DataFrame, column: str, *, digits_only: bool = True) -> pd.DataFrame:
64
+ data = df.copy()
65
+ if column not in data.columns:
66
+ return data
67
+
68
+ if digits_only:
69
+ extracted = data[column].astype(str).str.extract(r"(\\d+)")
70
+ data[column] = pd.to_numeric(extracted[0], errors="coerce")
71
+ data[column] = pd.to_numeric(data[column], errors="coerce").astype("Int64")
72
+ return data
73
+
74
+
75
+ def _rename_column(df: pd.DataFrame, source: str, target: str) -> pd.DataFrame:
76
+ if source not in df.columns:
77
+ return df
78
+ return df.rename(columns={source: target})
79
+
80
+
81
+ def _log_id_diagnostics(df: pd.DataFrame, *, name: str, col_id: str) -> None:
82
+ if col_id not in df.columns:
83
+ logger.warning("La colonne {} est absente du fichier {}.", col_id, name)
84
+ return
85
+ total = len(df)
86
+ uniques = df[col_id].nunique(dropna=True)
87
+ duplicates = total - uniques
88
+ logger.info(
89
+ "{name}: {total} lignes | {uniques} identifiants uniques | {duplicates} doublons",
90
+ name=name,
91
+ total=total,
92
+ uniques=uniques,
93
+ duplicates=duplicates,
94
+ )
95
+
96
+
97
+ def _persist_sql_trace(df_dict: dict[str, pd.DataFrame], settings: Settings) -> pd.DataFrame:
98
+ """
99
+ Reproduire la fusion SQL décrite dans le notebook.
100
 
101
+ Chaque DataFrame est stocké dans une base SQLite éphémère pour
102
+ conserver une traçabilité de la requête exécutée.
103
+ """
104
+ db_path = settings.db_file
105
+ sql_path = settings.sql_file
106
 
107
+ db_path.parent.mkdir(parents=True, exist_ok=True)
108
+ sql_path.parent.mkdir(parents=True, exist_ok=True)
109
 
110
+ if db_path.exists():
111
+ db_path.unlink()
112
+
113
+ query = f"""
114
+ SELECT *
115
+ FROM sirh
116
+ INNER JOIN evaluation USING ({settings.col_id})
117
+ INNER JOIN sond USING ({settings.col_id});
118
+ """.strip()
119
+
120
+ with db_path.open("wb") as _:
121
+ pass # just ensure the file exists for sqlite on some platforms
122
+
123
+ with sqlite3.connect(db_path) as conn:
124
+ for name, frame in df_dict.items():
125
+ frame.to_sql(name, conn, index=False, if_exists="replace")
126
+ merged = pd.read_sql_query(query, conn)
127
+
128
+ sql_path.write_text(query, encoding="utf-8")
129
+ return merged
130
+
131
+
132
+ def build_dataset(settings: Settings) -> pd.DataFrame:
133
+ """Load, clean, harmonize and merge the three raw sources."""
134
+ sirh = clean_text_values(
135
+ safe_read_csv(settings.path_sirh).pipe(
136
+ _harmonize_id_column, settings.col_id, digits_only=True
137
+ )
138
+ )
139
+ evaluation = clean_text_values(
140
+ safe_read_csv(settings.path_eval)
141
+ .pipe(_rename_column, "eval_number", settings.col_id)
142
+ .pipe(_harmonize_id_column, settings.col_id, digits_only=True)
143
+ )
144
+ sond = clean_text_values(
145
+ safe_read_csv(settings.path_sondage)
146
+ .pipe(_rename_column, "code_sondage", settings.col_id)
147
+ .pipe(_harmonize_id_column, settings.col_id, digits_only=True)
148
+ )
149
+
150
+ for name, frame in {"sirh": sirh, "evaluation": evaluation, "sond": sond}.items():
151
+ _log_id_diagnostics(frame, name=name, col_id=settings.col_id)
152
+
153
+ frames = {
154
+ "sirh": sirh,
155
+ "evaluation": evaluation,
156
+ "sond": sond,
157
+ }
158
+ merged = _persist_sql_trace(frames, settings)
159
+
160
+ missing_cols = [settings.col_id] if settings.col_id not in merged.columns else []
161
+ if missing_cols:
162
+ raise KeyError(
163
+ f"La colonne {settings.col_id} est absente de la fusion finale. "
164
+ "Vérifiez vos fichiers sources."
165
+ )
166
+
167
+ logger.success("Fusion réalisée: {} lignes / {} colonnes", *merged.shape)
168
+ return merged
169
+
170
+
171
+ def save_dataset(df: pd.DataFrame, output_path: Path) -> None:
172
+ output_path.parent.mkdir(parents=True, exist_ok=True)
173
+ df.to_csv(output_path, index=False)
174
+ logger.success("Fichier fusionné sauvegardé dans {}", output_path)
175
+
176
+
177
+ # ---------------------------------------------------------------------------
178
+ # CLI
179
+ # ---------------------------------------------------------------------------
180
  @app.command()
181
  def main(
182
+ settings_path: Path = typer.Option(
183
+ None,
184
+ "--settings",
185
+ "-s",
186
+ help="Chemin vers un fichier settings.yml personnalisé.",
187
+ ),
188
+ output_path: Path = typer.Option(
189
+ INTERIM_DATA_DIR / "merged.csv",
190
+ "--output",
191
+ "-o",
192
+ help="Chemin de sortie du dataset fusionné.",
193
+ ),
194
  ):
195
+ """Entrypoint Typer pour reproduire la fusion des données brutes."""
196
+
197
+ settings = load_settings(settings_path) if settings_path else load_settings()
198
+ df = build_dataset(settings)
199
+ save_dataset(df, output_path)
 
 
200
 
201
 
202
  if __name__ == "__main__":
hf_space/hf_space/hf_space/projet_05/explainability.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Tuple
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from loguru import logger
9
+
10
+ from projet_05.branding import Theme, apply_brand_theme, make_diverging_cmap
11
+ from scripts_projet04.manet_projet04.shap_generator import ( # type: ignore[import-not-found]
12
+ shap_global,
13
+ shap_local,
14
+ )
15
+
16
+ apply_brand_theme()
17
+
18
+
19
+ def _shape_array(values) -> np.ndarray:
20
+ if hasattr(values, "values"):
21
+ arr = np.array(values.values)
22
+ else:
23
+ arr = np.array(values)
24
+ return np.nan_to_num(arr, copy=False)
25
+
26
+
27
+ def compute_shap_summary(
28
+ pipeline,
29
+ X: pd.DataFrame,
30
+ y: pd.Series,
31
+ *,
32
+ max_samples: int = 500,
33
+ ) -> Tuple[pd.DataFrame | None, object | None]:
34
+ """
35
+ Reuse the historical `shap_global` helper to build the plots and a tabular summary.
36
+
37
+ Returns
38
+ -------
39
+ summary_df : pd.DataFrame | None
40
+ Moyenne absolue des valeurs SHAP (ordre décroissant).
41
+ shap_values : shap.Explanation | None
42
+ Objet renvoyé par shap_global pour des analyses locales ultérieures.
43
+ """
44
+ cmap = make_diverging_cmap(Theme.PRIMARY, Theme.SECONDARY)
45
+ shap_values, _, feature_names = shap_global(
46
+ pipeline,
47
+ X,
48
+ y,
49
+ sample_size=max_samples,
50
+ cmap=cmap,
51
+ )
52
+ if shap_values is None or feature_names is None:
53
+ logger.warning("Impossible de générer les résumés SHAP.")
54
+ return None, None
55
+
56
+ shap_array = _shape_array(shap_values)
57
+ if shap_array.ndim == 1:
58
+ shap_array = shap_array.reshape(-1, 1)
59
+ mean_abs = np.abs(shap_array).mean(axis=0)
60
+ summary = (
61
+ pd.DataFrame({"feature": list(feature_names), "mean_abs_shap": mean_abs})
62
+ .sort_values("mean_abs_shap", ascending=False)
63
+ .reset_index(drop=True)
64
+ )
65
+ return summary, shap_values
66
+
67
+
68
+ def save_shap_summary(summary: pd.DataFrame, output_path: Path) -> None:
69
+ output_path.parent.mkdir(parents=True, exist_ok=True)
70
+ summary.to_csv(output_path, index=False)
71
+ logger.info("Résumé SHAP sauvegardé dans {}", output_path)
72
+
73
+
74
+ def export_local_explanations(
75
+ pipeline,
76
+ shap_values,
77
+ X: pd.DataFrame,
78
+ custom_index: int | None = None,
79
+ ) -> None:
80
+ """
81
+ Génère trois cas d'usage par défaut (impact max, risque max, risque min)
82
+ et un indice custom optionnel pour la trace historique.
83
+ """
84
+ if shap_values is None:
85
+ return
86
+
87
+ shap_array = _shape_array(shap_values)
88
+ idx_impact = int(np.argmax(np.sum(np.abs(shap_array), axis=1)))
89
+ shap_local(idx_impact, shap_values)
90
+
91
+ y_proba_all = pipeline.predict_proba(X)[:, 1]
92
+ idx_highrisk = int(np.argmax(y_proba_all))
93
+ shap_local(idx_highrisk, shap_values)
94
+
95
+ idx_lowrisk = int(np.argmin(y_proba_all))
96
+ shap_local(idx_lowrisk, shap_values, text_scale=0.6)
97
+
98
+ if custom_index is not None:
99
+ shap_local(custom_index, shap_values, max_display=8)
100
+
101
+
102
+ __all__ = ["compute_shap_summary", "save_shap_summary", "export_local_explanations"]
hf_space/hf_space/hf_space/projet_05/features.py CHANGED
@@ -1,28 +1,170 @@
 
 
 
 
1
  from pathlib import Path
2
 
 
 
3
  from loguru import logger
4
- from tqdm import tqdm
5
  import typer
6
 
7
- from projet_05.config import PROCESSED_DATA_DIR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- app = typer.Typer()
 
 
 
 
 
 
 
 
 
10
 
 
 
 
 
 
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  @app.command()
13
  def main(
14
- # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
- input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
16
- output_path: Path = PROCESSED_DATA_DIR / "features.csv",
17
- # -----------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  ):
19
- # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
- logger.info("Generating features from dataset...")
21
- for i in tqdm(range(10), total=10):
22
- if i == 5:
23
- logger.info("Something happened for iteration 5.")
24
- logger.success("Features generation complete.")
25
- # -----------------------------------------
26
 
27
 
28
  if __name__ == "__main__":
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from datetime import datetime
5
  from pathlib import Path
6
 
7
+ import numpy as np
8
+ import pandas as pd
9
  from loguru import logger
 
10
  import typer
11
 
12
+ from projet_05.config import INTERIM_DATA_DIR, PROCESSED_DATA_DIR
13
+ from projet_05.settings import Settings, load_settings
14
+
15
+ app = typer.Typer(help="Génération des features et nettoyage de la cible.")
16
+
17
+ TARGET_MAPPING = {
18
+ "1": 1,
19
+ "0": 0,
20
+ "oui": 1,
21
+ "non": 0,
22
+ "true": 1,
23
+ "false": 0,
24
+ "quitte": 1,
25
+ "reste": 0,
26
+ "yes": 1,
27
+ "no": 0,
28
+ }
29
+
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # Utilitaires cœur de pipeline
33
+ # ---------------------------------------------------------------------------
34
+ def _load_merged_dataset(path: Path) -> pd.DataFrame:
35
+ if not path.exists():
36
+ raise FileNotFoundError(
37
+ f"Le fichier fusionné {path} est introuvable. Lancez `python projet_05/dataset.py` d'abord."
38
+ )
39
+ logger.info("Chargement du dataset fusionné depuis {}", path)
40
+ return pd.read_csv(path)
41
+
42
+
43
+ def _normalize_target(df: pd.DataFrame, settings: Settings) -> pd.DataFrame:
44
+ if settings.target not in df.columns:
45
+ raise KeyError(f"La variable cible '{settings.target}' est absente du fichier.")
46
+
47
+ normalized = (
48
+ df[settings.target]
49
+ .astype(str)
50
+ .str.strip()
51
+ .str.lower()
52
+ .map(TARGET_MAPPING)
53
+ )
54
+ df = df.copy()
55
+ df[settings.target] = normalized
56
+ before = len(df)
57
+ df = df[df[settings.target].isin([0, 1])].copy()
58
+ dropped = before - len(df)
59
+ if dropped:
60
+ logger.warning("Suppression de {} lignes avec une cible invalide.", dropped)
61
+ df[settings.target] = df[settings.target].astype(int)
62
+ return df
63
+
64
+
65
+ def _safe_ratio(df: pd.DataFrame, numerator: str, denominator: str, output: str) -> None:
66
+ if numerator not in df.columns or denominator not in df.columns:
67
+ return
68
+ denominator_series = df[denominator].replace({0: np.nan})
69
+ df[output] = df[numerator] / denominator_series
70
+
71
+
72
+ def _engineer_features(df: pd.DataFrame, settings: Settings) -> pd.DataFrame:
73
+ engineered = df.copy()
74
 
75
+ col = "augementation_salaire_precedente"
76
+ if col in engineered:
77
+ engineered[col] = (
78
+ engineered[col]
79
+ .astype(str)
80
+ .str.replace("%", "", regex=False)
81
+ .str.replace(",", ".", regex=False)
82
+ .str.strip()
83
+ )
84
+ engineered[col] = pd.to_numeric(engineered[col], errors="coerce") / 100
85
 
86
+ _safe_ratio(engineered, "augementation_salaire_precedente", "revenu_mensuel", "augmentation_par_revenu")
87
+ _safe_ratio(engineered, "annees_dans_le_poste_actuel", "annee_experience_totale", "annee_sur_poste_par_experience")
88
+ _safe_ratio(engineered, "nb_formations_suivies", "annee_experience_totale", "nb_formation_par_experience")
89
+ _safe_ratio(
90
+ engineered, "annees_depuis_la_derniere_promotion", "annee_experience_totale", "dern_promo_par_experience"
91
+ )
92
 
93
+ if settings.sat_cols:
94
+ existing = [col for col in settings.sat_cols if col in engineered.columns]
95
+ if existing:
96
+ engineered["score_moyen_satisfaction"] = engineered[existing].mean(axis=1)
97
+
98
+ if "note_evaluation_actuelle" in engineered.columns and "note_evaluation_precedente" in engineered.columns:
99
+ engineered["evolution_note"] = (
100
+ engineered["note_evaluation_actuelle"] - engineered["note_evaluation_precedente"]
101
+ )
102
+
103
+ return engineered
104
+
105
+
106
+ def build_features(settings: Settings, *, input_path: Path) -> pd.DataFrame:
107
+ df = _load_merged_dataset(input_path)
108
+ df = _normalize_target(df, settings)
109
+ df = _engineer_features(df, settings)
110
+ return df
111
+
112
+
113
+ def save_features(df: pd.DataFrame, output_path: Path) -> None:
114
+ output_path.parent.mkdir(parents=True, exist_ok=True)
115
+ df.to_csv(output_path, index=False)
116
+ logger.success("Dataset enrichi sauvegardé dans {}", output_path)
117
+
118
+
119
+ def save_schema(settings: Settings, output_path: Path) -> None:
120
+ schema = {
121
+ "target": settings.target,
122
+ "col_id": settings.col_id,
123
+ "numerical_features": list(settings.num_cols),
124
+ "categorical_features": list(settings.cat_cols),
125
+ "satisfaction_features": list(settings.sat_cols),
126
+ "generated_at": datetime.utcnow().isoformat(),
127
+ }
128
+ output_path.parent.mkdir(parents=True, exist_ok=True)
129
+ output_path.write_text(json.dumps(schema, indent=2), encoding="utf-8")
130
+ logger.info("Schéma sauvegardé dans {}", output_path)
131
+
132
+
133
+ # ---------------------------------------------------------------------------
134
+ # CLI
135
+ # ---------------------------------------------------------------------------
136
  @app.command()
137
  def main(
138
+ settings_path: Path = typer.Option(
139
+ None,
140
+ "--settings",
141
+ "-s",
142
+ help="Chemin optionnel vers un fichier settings.yml personnalisé.",
143
+ ),
144
+ input_path: Path = typer.Option(
145
+ INTERIM_DATA_DIR / "merged.csv",
146
+ "--input",
147
+ "-i",
148
+ help="Chemin du fichier issu de la fusion.",
149
+ ),
150
+ output_path: Path = typer.Option(
151
+ PROCESSED_DATA_DIR / "dataset.csv",
152
+ "--output",
153
+ "-o",
154
+ help="Chemin du fichier enrichi.",
155
+ ),
156
+ schema_path: Path = typer.Option(
157
+ PROCESSED_DATA_DIR / "schema.json",
158
+ "--schema",
159
+ help="Chemin de sauvegarde du schéma de features.",
160
+ ),
161
  ):
162
+ """Pipeline Typer pour préparer le dataset enrichi."""
163
+
164
+ settings = load_settings(settings_path) if settings_path else load_settings()
165
+ df = build_features(settings, input_path=input_path)
166
+ save_features(df, output_path)
167
+ save_schema(settings, schema_path)
 
168
 
169
 
170
  if __name__ == "__main__":
hf_space/hf_space/hf_space/projet_05/modeling/predict.py CHANGED
@@ -1,29 +1,99 @@
 
 
 
1
  from pathlib import Path
2
 
 
 
 
3
  from loguru import logger
4
- from tqdm import tqdm
5
  import typer
6
 
7
  from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
8
 
9
- app = typer.Typer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  @app.command()
13
  def main(
14
- # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
- features_path: Path = PROCESSED_DATA_DIR / "test_features.csv",
16
- model_path: Path = MODELS_DIR / "model.pkl",
17
- predictions_path: Path = PROCESSED_DATA_DIR / "test_predictions.csv",
18
- # -----------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  ):
20
- # ---- REPLACE THIS WITH YOUR OWN CODE ----
21
- logger.info("Performing inference for model...")
22
- for i in tqdm(range(10), total=10):
23
- if i == 5:
24
- logger.info("Something happened for iteration 5.")
25
- logger.success("Inference complete.")
26
- # -----------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  if __name__ == "__main__":
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
  from pathlib import Path
5
 
6
+ import numpy as np
7
+ import pandas as pd
8
+ from joblib import load
9
  from loguru import logger
 
10
  import typer
11
 
12
  from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
13
 
14
+ app = typer.Typer(help="Inférence à partir du pipeline entraîné.")
15
+
16
+
17
+ def load_pipeline(model_path: Path):
18
+ if not model_path.exists():
19
+ raise FileNotFoundError(f"Modèle introuvable: {model_path}")
20
+ logger.info("Chargement du modèle {}", model_path)
21
+ return load(model_path)
22
+
23
+
24
+ def load_metadata(metadata_path: Path) -> dict:
25
+ if not metadata_path.exists():
26
+ raise FileNotFoundError(f"Fichier métadonnées introuvable: {metadata_path}")
27
+ return json.loads(metadata_path.read_text(encoding="utf-8"))
28
+
29
+
30
+ def run_inference(
31
+ df: pd.DataFrame,
32
+ pipeline,
33
+ threshold: float,
34
+ drop_columns: list[str] | None = None,
35
+ required_features: list[str] | None = None,
36
+ ) -> pd.DataFrame:
37
+ features = df.drop(columns=drop_columns or [], errors="ignore")
38
+ if required_features:
39
+ for col in required_features:
40
+ if col not in features.columns:
41
+ features[col] = np.nan
42
+ features = features[required_features]
43
+ proba = pipeline.predict_proba(features)[:, 1]
44
+ predictions = (proba >= threshold).astype(int)
45
+ output = df.copy()
46
+ output["proba_depart"] = proba
47
+ output["prediction"] = predictions
48
+ return output
49
 
50
 
51
  @app.command()
52
  def main(
53
+ model_path: Path = typer.Option(
54
+ MODELS_DIR / "best_model.joblib",
55
+ "--model-path",
56
+ help="Pipeline entraîné sauvegardé via train.py",
57
+ ),
58
+ metadata_path: Path = typer.Option(
59
+ MODELS_DIR / "best_model_meta.json",
60
+ "--metadata-path",
61
+ help="Fichier JSON contenant le seuil optimal.",
62
+ ),
63
+ features_path: Path = typer.Option(
64
+ PROCESSED_DATA_DIR / "dataset.csv",
65
+ "--features",
66
+ "-f",
67
+ help="Jeu de features sur lequel produire des prédictions.",
68
+ ),
69
+ predictions_path: Path = typer.Option(
70
+ PROCESSED_DATA_DIR / "predictions.csv",
71
+ "--output",
72
+ "-o",
73
+ help="Chemin de sauvegarde des prédictions.",
74
+ ),
75
  ):
76
+ """Entrypoint Typer pour générer un fichier de prédictions."""
77
+
78
+ pipeline = load_pipeline(model_path)
79
+ metadata = load_metadata(metadata_path)
80
+ threshold = metadata.get("best_threshold", 0.5)
81
+ features_cfg = metadata.get("features", {})
82
+ required_features = (features_cfg.get("numerical") or []) + (features_cfg.get("categorical") or [])
83
+ df = pd.read_csv(features_path)
84
+ logger.info("Dataset chargé: {} lignes", len(df))
85
+
86
+ target_col = metadata.get("target")
87
+ predictions = run_inference(
88
+ df,
89
+ pipeline,
90
+ threshold,
91
+ drop_columns=[target_col] if target_col else None,
92
+ required_features=required_features or None,
93
+ )
94
+ predictions_path.parent.mkdir(parents=True, exist_ok=True)
95
+ predictions.to_csv(predictions_path, index=False)
96
+ logger.success("Prédictions sauvegardées dans {}", predictions_path)
97
 
98
 
99
  if __name__ == "__main__":
hf_space/hf_space/hf_space/projet_05/modeling/train.py CHANGED
@@ -1,29 +1,342 @@
 
 
 
 
1
  from pathlib import Path
 
2
 
 
 
 
 
 
3
  from loguru import logger
4
- from tqdm import tqdm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import typer
6
 
7
- from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- app = typer.Typer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  @app.command()
13
  def main(
14
- # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
- features_path: Path = PROCESSED_DATA_DIR / "features.csv",
16
- labels_path: Path = PROCESSED_DATA_DIR / "labels.csv",
17
- model_path: Path = MODELS_DIR / "model.pkl",
18
- # -----------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  ):
20
- # ---- REPLACE THIS WITH YOUR OWN CODE ----
21
- logger.info("Training some model...")
22
- for i in tqdm(range(10), total=10):
23
- if i == 5:
24
- logger.info("Something happened for iteration 5.")
25
- logger.success("Modeling training complete.")
26
- # -----------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  if __name__ == "__main__":
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
  from pathlib import Path
6
+ from typing import Dict, Tuple
7
 
8
+ import numpy as np
9
+ import pandas as pd
10
+ from imblearn.over_sampling import SMOTE
11
+ from imblearn.pipeline import Pipeline as ImbPipeline
12
+ from joblib import dump
13
  from loguru import logger
14
+ from sklearn.base import clone
15
+ from sklearn.compose import ColumnTransformer
16
+ from sklearn.ensemble import RandomForestClassifier
17
+ from sklearn.impute import SimpleImputer
18
+ from sklearn.linear_model import LogisticRegression
19
+ from sklearn.metrics import (
20
+ f1_score,
21
+ precision_recall_curve,
22
+ precision_score,
23
+ recall_score,
24
+ roc_auc_score,
25
+ )
26
+ from sklearn.model_selection import GridSearchCV, StratifiedKFold, cross_val_predict
27
+ from sklearn.pipeline import Pipeline
28
+ from sklearn.preprocessing import OneHotEncoder, StandardScaler
29
  import typer
30
 
31
+ from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR, REPORTS_DIR
32
+ from projet_05.explainability import (
33
+ compute_shap_summary,
34
+ export_local_explanations,
35
+ save_shap_summary,
36
+ )
37
+ from projet_05.settings import Settings, load_settings
38
+
39
+ app = typer.Typer(help="Entraînement et sélection du meilleur modèle.")
40
+
41
+
42
+ def _clean_values(payload: dict) -> dict:
43
+ def _convert(value):
44
+ if isinstance(value, (np.floating, np.integer)):
45
+ return value.item()
46
+ return value
47
+
48
+ return {key: _convert(value) for key, value in payload.items()}
49
+
50
+
51
+ @dataclass
52
+ class ModelResult:
53
+ name: str
54
+ best_estimator: ImbPipeline
55
+ best_params: dict
56
+ best_threshold: float
57
+ metrics: Dict[str, float]
58
+
59
+
60
+ def load_processed_dataset(path: Path) -> pd.DataFrame:
61
+ if not path.exists():
62
+ raise FileNotFoundError(
63
+ f"Dataset traité introuvable ({path}). Lancez `python projet_05/features.py`."
64
+ )
65
+ logger.info("Chargement du dataset préparé depuis {}", path)
66
+ return pd.read_csv(path)
67
+
68
+
69
+ def split_features_target(df: pd.DataFrame, settings: Settings) -> Tuple[pd.DataFrame, pd.Series]:
70
+ if settings.target not in df.columns:
71
+ raise KeyError(f"La cible {settings.target} est absente du dataset.")
72
+ y = df[settings.target].astype(int)
73
+ drop_cols = [settings.target]
74
+ if settings.col_id in df.columns:
75
+ drop_cols.append(settings.col_id)
76
+ X = df.drop(columns=drop_cols, errors="ignore")
77
+ return X, y
78
+
79
+
80
+ def build_preprocessor(settings: Settings, X: pd.DataFrame) -> ColumnTransformer:
81
+ numeric_features = [col for col in settings.num_cols if col in X.columns]
82
+ categorical_features = [col for col in settings.cat_cols if col in X.columns]
83
+ if not numeric_features:
84
+ numeric_features = X.select_dtypes(include="number").columns.tolist()
85
+ if not categorical_features:
86
+ categorical_features = X.select_dtypes(exclude="number").columns.tolist()
87
+
88
+ numeric_transformer = Pipeline(
89
+ steps=[
90
+ ("imputer", SimpleImputer(strategy="median")),
91
+ ("scaler", StandardScaler()),
92
+ ]
93
+ )
94
+ categorical_transformer = Pipeline(
95
+ steps=[
96
+ ("imputer", SimpleImputer(strategy="most_frequent")),
97
+ ("encoder", OneHotEncoder(handle_unknown="ignore", sparse_output=False)),
98
+ ]
99
+ )
100
+ transformers = []
101
+ if numeric_features:
102
+ transformers.append(("num", numeric_transformer, numeric_features))
103
+ if categorical_features:
104
+ transformers.append(("cat", categorical_transformer, categorical_features))
105
+ if not transformers:
106
+ raise ValueError("Aucune feature disponible pour l'entraînement.")
107
+ return ColumnTransformer(transformers=transformers)
108
+
109
+
110
+ def get_models(random_state: int):
111
+ return {
112
+ "LogReg_balanced": (
113
+ LogisticRegression(
114
+ max_iter=2000,
115
+ class_weight="balanced",
116
+ random_state=random_state,
117
+ ),
118
+ [
119
+ {
120
+ "clf__solver": ["lbfgs"],
121
+ "clf__penalty": ["l2"],
122
+ "clf__C": [0.1, 1.0, 10.0],
123
+ },
124
+ {
125
+ "clf__solver": ["liblinear"],
126
+ "clf__penalty": ["l1", "l2"],
127
+ "clf__C": [0.1, 1.0, 10.0],
128
+ },
129
+ ],
130
+ ),
131
+ "RF_balanced": (
132
+ RandomForestClassifier(
133
+ n_estimators=300,
134
+ max_depth=8,
135
+ min_samples_split=10,
136
+ min_samples_leaf=5,
137
+ class_weight="balanced_subsample",
138
+ random_state=random_state,
139
+ ),
140
+ {
141
+ "clf__n_estimators": [200, 300, 500],
142
+ "clf__max_depth": [6, 8, 10],
143
+ "clf__min_samples_split": [5, 10, 15],
144
+ "clf__min_samples_leaf": [2, 5, 8],
145
+ },
146
+ ),
147
+ }
148
+
149
 
150
+ def _compute_best_threshold(y_true, y_proba):
151
+ precision, recall, thresholds = precision_recall_curve(y_true, y_proba)
152
+ f1_scores = 2 * (precision * recall) / (precision + recall + 1e-8)
153
+ best_idx = np.nanargmax(f1_scores)
154
+ if thresholds.size == 0:
155
+ return 0.5
156
+ best_idx = min(best_idx, thresholds.size - 1)
157
+ return thresholds[best_idx]
158
+
159
+
160
+ def evaluate_models(X, y, settings: Settings, preprocessor: ColumnTransformer) -> list[ModelResult]:
161
+ cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=settings.random_state)
162
+ results: list[ModelResult] = []
163
+
164
+ for name, (model, grid) in get_models(settings.random_state).items():
165
+ logger.info("Entraînement du modèle {}", name)
166
+ pipe = ImbPipeline(
167
+ steps=[
168
+ ("prep", preprocessor),
169
+ ("smote", SMOTE(random_state=settings.random_state)),
170
+ ("clf", model),
171
+ ]
172
+ )
173
+ search = GridSearchCV(
174
+ estimator=pipe,
175
+ param_grid=grid,
176
+ cv=cv,
177
+ scoring="f1",
178
+ n_jobs=-1,
179
+ )
180
+ search.fit(X, y)
181
+ best_pipe = search.best_estimator_
182
+
183
+ y_proba = cross_val_predict(best_pipe, X, y, cv=cv, method="predict_proba")[:, 1]
184
+ threshold = _compute_best_threshold(y, y_proba)
185
+ y_pred = (y_proba >= threshold).astype(int)
186
+
187
+ metrics = {
188
+ "f1": f1_score(y, y_pred),
189
+ "recall": recall_score(y, y_pred),
190
+ "precision": precision_score(y, y_pred),
191
+ "roc_auc": roc_auc_score(y, y_proba),
192
+ }
193
+ logger.info("Scores {} -> {}", name, metrics)
194
+ results.append(
195
+ ModelResult(
196
+ name=name,
197
+ best_estimator=best_pipe,
198
+ best_params=search.best_params_,
199
+ best_threshold=threshold,
200
+ metrics=metrics,
201
+ )
202
+ )
203
+ return results
204
+
205
+
206
+ def compute_dummy_baseline(y: pd.Series) -> dict:
207
+ majority = int(y.mode().iloc[0])
208
+ y_pred = np.full_like(y, fill_value=majority)
209
+ return {
210
+ "strategy": "most_frequent",
211
+ "majority_class": majority,
212
+ "f1": f1_score(y, y_pred),
213
+ "recall": recall_score(y, y_pred),
214
+ "precision": precision_score(y, y_pred, zero_division=0),
215
+ "roc_auc": 0.5,
216
+ }
217
+
218
+
219
+ def fit_final_pipeline(
220
+ best_result: ModelResult,
221
+ X: pd.DataFrame,
222
+ y: pd.Series,
223
+ settings: Settings,
224
+ ):
225
+ sm = SMOTE(random_state=settings.random_state)
226
+ X_bal, y_bal = sm.fit_resample(X, y)
227
+ final_preprocessor = build_preprocessor(settings, X)
228
+ clf = clone(best_result.best_estimator.named_steps["clf"])
229
+ final_pipe = Pipeline(
230
+ steps=[
231
+ ("prep", final_preprocessor),
232
+ ("clf", clf),
233
+ ]
234
+ )
235
+ final_pipe.fit(X_bal, y_bal)
236
+ logger.success(
237
+ "Modèle {} ré-entraîné sur {} lignes équilibrées.", best_result.name, len(X_bal)
238
+ )
239
+ return final_pipe
240
+
241
+
242
+ def save_artifacts(
243
+ pipeline: Pipeline,
244
+ results: list[ModelResult],
245
+ best_result: ModelResult,
246
+ baseline: dict,
247
+ settings: Settings,
248
+ model_path: Path,
249
+ metadata_path: Path,
250
+ shap_path: Path,
251
+ X: pd.DataFrame,
252
+ y: pd.Series,
253
+ ):
254
+ model_path.parent.mkdir(parents=True, exist_ok=True)
255
+ dump(pipeline, model_path)
256
+ logger.success("Pipeline sauvegardé dans {}", model_path)
257
+
258
+ metadata = {
259
+ "best_model": best_result.name,
260
+ "best_threshold": float(best_result.best_threshold),
261
+ "best_params": best_result.best_params,
262
+ "metrics": _clean_values(best_result.metrics),
263
+ "all_results": [
264
+ {
265
+ "model": r.name,
266
+ "metrics": _clean_values(r.metrics),
267
+ "best_threshold": float(r.best_threshold),
268
+ "best_params": r.best_params,
269
+ }
270
+ for r in results
271
+ ],
272
+ "baseline": _clean_values(baseline),
273
+ "features": {
274
+ "numerical": list(settings.num_cols),
275
+ "categorical": list(settings.cat_cols),
276
+ },
277
+ "target": settings.target,
278
+ }
279
+ metadata_path.parent.mkdir(parents=True, exist_ok=True)
280
+ metadata_path.write_text(json.dumps(metadata, indent=2), encoding="utf-8")
281
+ logger.info("Métadonnées sauvegardées dans {}", metadata_path)
282
+
283
+ shap_summary, shap_values = compute_shap_summary(pipeline, X, y)
284
+ if shap_summary is not None:
285
+ save_shap_summary(shap_summary, shap_path)
286
+ export_local_explanations(pipeline, shap_values, X)
287
 
288
 
289
  @app.command()
290
  def main(
291
+ settings_path: Path = typer.Option(None, "--settings", "-s", help="Chemin alternatif vers settings.yml."),
292
+ input_path: Path = typer.Option(
293
+ PROCESSED_DATA_DIR / "dataset.csv",
294
+ "--input",
295
+ "-i",
296
+ help="Dataset enrichi issu de projet_05/features.py",
297
+ ),
298
+ model_path: Path = typer.Option(
299
+ MODELS_DIR / "best_model.joblib",
300
+ "--model-path",
301
+ help="Chemin de sauvegarde du pipeline entraîné.",
302
+ ),
303
+ metadata_path: Path = typer.Option(
304
+ MODELS_DIR / "best_model_meta.json",
305
+ "--metadata-path",
306
+ help="Chemin de sauvegarde des métriques et métadonnées.",
307
+ ),
308
+ shap_path: Path = typer.Option(
309
+ REPORTS_DIR / "shap_summary.csv",
310
+ "--shap-path",
311
+ help="Chemin de sortie du résumé SHAP.",
312
+ ),
313
  ):
314
+ """Script principal pour lancer l'entraînement complet."""
315
+
316
+ settings = load_settings(settings_path) if settings_path else load_settings()
317
+ df = load_processed_dataset(input_path)
318
+ X, y = split_features_target(df, settings)
319
+ preprocessor = build_preprocessor(settings, X)
320
+ results = evaluate_models(X, y, settings, preprocessor)
321
+ if not results:
322
+ raise RuntimeError("Aucun modèle évalué. Vérifiez la configuration.")
323
+ best_result = max(results, key=lambda r: r.metrics["f1"])
324
+ baseline = compute_dummy_baseline(y)
325
+ logger.info("Baseline Dummy -> {}", baseline)
326
+
327
+ final_pipeline = fit_final_pipeline(best_result, X, y, settings)
328
+ save_artifacts(
329
+ final_pipeline,
330
+ results,
331
+ best_result,
332
+ baseline,
333
+ settings,
334
+ model_path,
335
+ metadata_path,
336
+ shap_path,
337
+ X,
338
+ y,
339
+ )
340
 
341
 
342
  if __name__ == "__main__":
hf_space/hf_space/hf_space/projet_05/settings.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from functools import lru_cache
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Iterable
8
+
9
+ import yaml
10
+
11
+ DEFAULT_SETTINGS_PATH = Path(__file__).with_name("settings.yml")
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class Settings:
16
+ random_state: int = 42
17
+ path_sirh: Path = field(default_factory=lambda: Path("data/raw/sirh.csv"))
18
+ path_eval: Path = field(default_factory=lambda: Path("data/raw/evaluation.csv"))
19
+ path_sondage: Path = field(default_factory=lambda: Path("data/raw/sondage.csv"))
20
+ col_id: str = "id_employee"
21
+ target: str = "a_quitte_l_entreprise"
22
+ num_cols: tuple[str, ...] = ()
23
+ cat_cols: tuple[str, ...] = ()
24
+ sat_cols: tuple[str, ...] = ()
25
+ first_vars: tuple[str, ...] = ()
26
+ subsample_frac: float = 1.0
27
+ sql_file: Path = field(default_factory=lambda: Path("merge_sql.sql"))
28
+ db_file: Path = field(default_factory=lambda: Path("merge_temp.db"))
29
+
30
+ def as_dict(self) -> dict:
31
+ """Return a serializable representation (useful for logging/tests)."""
32
+ return {
33
+ "random_state": self.random_state,
34
+ "path_sirh": str(self.path_sirh),
35
+ "path_eval": str(self.path_eval),
36
+ "path_sondage": str(self.path_sondage),
37
+ "col_id": self.col_id,
38
+ "target": self.target,
39
+ "num_cols": list(self.num_cols),
40
+ "cat_cols": list(self.cat_cols),
41
+ "sat_cols": list(self.sat_cols),
42
+ "first_vars": list(self.first_vars),
43
+ "subsample_frac": self.subsample_frac,
44
+ "sql_file": str(self.sql_file),
45
+ "db_file": str(self.db_file),
46
+ }
47
+
48
+
49
+ def _ensure_iterable(values: Iterable[str] | None, *, field_name: str) -> tuple[str, ...]:
50
+ if values is None:
51
+ return ()
52
+ if isinstance(values, str):
53
+ msg = f"'{field_name}' doit être une liste et non une chaîne isolée."
54
+ raise TypeError(msg)
55
+ return tuple(v for v in values if v)
56
+
57
+
58
+ def _resolve_path(candidate: str | os.PathLike[str] | None, *, base_dir: Path) -> Path:
59
+ if not candidate:
60
+ raise ValueError("Aucun chemin n'a été fourni dans le fichier de configuration.")
61
+ resolved = Path(candidate)
62
+ if not resolved.is_absolute():
63
+ resolved = (base_dir / resolved).resolve()
64
+ return resolved
65
+
66
+
67
+ def _load_raw_settings(path: Path) -> dict:
68
+ with path.open("r", encoding="utf-8") as handle:
69
+ data = yaml.safe_load(handle) or {}
70
+ if not isinstance(data, dict):
71
+ raise ValueError(f"Le fichier de configuration {path} doit contenir un dictionnaire YAML.")
72
+ return data
73
+
74
+
75
+ @lru_cache
76
+ def load_settings(custom_path: str | os.PathLike[str] | None = None) -> Settings:
77
+ """
78
+ Charger la configuration projet depuis un fichier YAML.
79
+
80
+ L'ordre de recherche est :
81
+ 1. Argument `custom_path` si fourni.
82
+ 2. Variable d'environnement `PROJET05_SETTINGS`.
83
+ 3. Fichier par défaut `projet_05/settings.yml`.
84
+ """
85
+
86
+ env_path = os.environ.get("PROJET05_SETTINGS")
87
+ raw_path = Path(custom_path or env_path or DEFAULT_SETTINGS_PATH)
88
+
89
+ if not raw_path.exists():
90
+ raise FileNotFoundError(
91
+ f"Fichier de configuration introuvable : {raw_path}. "
92
+ "Initialisez-le depuis projet_05/settings.yml ou indiquez PROJET05_SETTINGS."
93
+ )
94
+
95
+ base_dir = raw_path.parent
96
+ payload = _load_raw_settings(raw_path)
97
+ paths_block = payload.get("paths", {})
98
+
99
+ settings = Settings(
100
+ random_state=int(payload.get("random_state", Settings.random_state)),
101
+ path_sirh=_resolve_path(paths_block.get("sirh", Settings().path_sirh), base_dir=base_dir),
102
+ path_eval=_resolve_path(paths_block.get("evaluation", Settings().path_eval), base_dir=base_dir),
103
+ path_sondage=_resolve_path(paths_block.get("sondage", Settings().path_sondage), base_dir=base_dir),
104
+ col_id=payload.get("col_id", Settings.col_id),
105
+ target=payload.get("target", Settings.target),
106
+ num_cols=_ensure_iterable(payload.get("num_cols"), field_name="num_cols"),
107
+ cat_cols=_ensure_iterable(payload.get("cat_cols"), field_name="cat_cols"),
108
+ sat_cols=_ensure_iterable(payload.get("sat_cols"), field_name="sat_cols"),
109
+ first_vars=_ensure_iterable(payload.get("first_vars"), field_name="first_vars"),
110
+ subsample_frac=float(payload.get("subsample_frac", Settings.subsample_frac)),
111
+ sql_file=_resolve_path(paths_block.get("sql_file", Settings().sql_file), base_dir=base_dir),
112
+ db_file=_resolve_path(paths_block.get("db_file", Settings().db_file), base_dir=base_dir),
113
+ )
114
+ return settings
hf_space/hf_space/hf_space/projet_05/settings.yml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ random_state: 42
2
+ col_id: id_employee
3
+ target: a_quitte_l_entreprise
4
+ subsample_frac: 0.5
5
+
6
+ paths:
7
+ sirh: ../data/raw/sirh.csv
8
+ evaluation: ../data/raw/evaluation.csv
9
+ sondage: ../data/raw/sondage.csv
10
+ sql_file: ../reports/merge_sql.sql
11
+ db_file: ../data/interim/merge_temp.db
12
+
13
+ num_cols:
14
+ - age
15
+ - revenu_mensuel
16
+ - annees_dans_l_entreprise
17
+ - annees_dans_le_poste_actuel
18
+ - annees_depuis_la_derniere_promotion
19
+ - distance_domicile_travail
20
+ - nombre_participation_pee
21
+ - note_evaluation_actuelle
22
+ - note_evaluation_precedente
23
+ - annees_depuis_le_changement_deposte
24
+ - annee_experience_totale
25
+ - nb_formations_suivies
26
+ - satisfaction_employee_environnement
27
+ - satisfaction_employee_nature_travail
28
+ - satisfaction_employee_equipe
29
+ - satisfaction_employee_equilibre_pro_perso
30
+ - augmentation_par_revenu
31
+ - annee_sur_poste_par_experience
32
+ - nb_formation_par_experience
33
+ - score_moyen_satisfaction
34
+ - dern_promo_par_experience
35
+ - evolution_note
36
+
37
+ cat_cols:
38
+ - genre
39
+ - departement
40
+ - frequence_deplacement
41
+ - etat_civil
42
+ - niveau_etudes
43
+ - role
44
+ - type_contrat
45
+
46
+ sat_cols:
47
+ - satisfaction_employee_environnement
48
+ - satisfaction_employee_nature_travail
49
+ - satisfaction_employee_equipe
50
+ - satisfaction_employee_equilibre_pro_perso
51
+
52
+ first_vars:
53
+ - age
54
+ - revenu_mensuel
55
+ - annees_dans_l_entreprise
56
+ - note_evaluation_actuelle
hf_space/hf_space/hf_space/scripts_projet04/brand/__init__.py ADDED
File without changes
hf_space/hf_space/hf_space/scripts_projet04/brand/brand.py ADDED
@@ -0,0 +1,713 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Palette et thèmes graphiques pour Matplotlib/Seaborn.
2
+
3
+ Ce module fournit une classe utilitaire (`Theme`) et une configuration
4
+ externe (`ThemeConfig`, `configure_theme`) permettant de définir des
5
+ couleurs, des palettes qualitatives et des cartes de couleurs (colormaps)
6
+ cohérentes. Des fonctions de démonstration et des wrappers
7
+ rétrocompatibles sont également fournis.
8
+ """
9
+
10
+ from pathlib import Path
11
+ from typing import Any, List, Literal, Mapping, Optional, Tuple, Union
12
+ from dataclasses import dataclass, field, fields
13
+
14
+ import seaborn as sns
15
+ import numpy as np # Données factices pour les démos
16
+ import matplotlib.pyplot as plt
17
+ import matplotlib.colors as mcolors
18
+
19
+
20
+ #
21
+ # Dataclass de configuration et gestion externe du thème
22
+ #
23
+ @dataclass
24
+ class ThemeConfig:
25
+ """Configuration externe du thème.
26
+
27
+ Cette structure regroupe les couleurs principales, les variantes de
28
+ palette et des options d'apparence (fond, rcParams). Elle peut ĂŞtre
29
+ passée à :func:`configure_theme`.
30
+
31
+ Attributs
32
+ ---------
33
+ primary, secondary, tertiary : str
34
+ Couleurs principales au format hexadécimal (p. ex. "#RRGGBB").
35
+ background : str
36
+ Couleur d'arrière‑plan des figures et axes.
37
+ primary_variants, secondary_variants, tertiary_variants : list[str]
38
+ Variantes qualitatives pour les séries multiples.
39
+ sequential_light, sequential_dark : dict | None
40
+ Remplacements explicites des teintes claires/foncées pour les
41
+ colormaps séquentiels.
42
+ light_amount, dark_amount : float
43
+ Coefficients utilisés pour éclaircir/assombrir si aucun
44
+ remplacement n'est fourni.
45
+ text_color, axes_labelcolor, tick_color : str
46
+ Couleurs des textes, étiquettes et graduations.
47
+ figure_dpi, savefig_dpi : int
48
+ Résolution d'affichage et d'export.
49
+ """
50
+ # Couleurs principales
51
+ primary: str = "#7451EB"
52
+ secondary: str = "#EE8273"
53
+ tertiary: str = "#A6BD63"
54
+ # Couleur d'arrière-plan
55
+ background: str = "#FFFCF2"
56
+ # Variantes qualitatives
57
+ primary_variants: List[str] = field(default_factory=lambda: ["#9D7EF0", "#4B25D6"])
58
+ secondary_variants: List[str] = field(default_factory=lambda: ["#F3A093", "#D95848"])
59
+ tertiary_variants: List[str] = field(default_factory=lambda: ["#BDD681", "#7E923F"])
60
+ # Remplacements stops séquentiels
61
+ sequential_light: Optional[dict] = field(default_factory=lambda: {
62
+ "primary": "#f3f0fd",
63
+ "secondary": "#fdecea",
64
+ "tertiary": "#f6faec",
65
+ })
66
+ sequential_dark: Optional[dict] = field(default_factory=lambda: {
67
+ "primary": "#2f1577",
68
+ "secondary": "#8b3025",
69
+ "tertiary": "#4b5c27",
70
+ })
71
+ # Coefficients de mélange par défaut
72
+ light_amount: float = 0.85
73
+ dark_amount: float = 0.65
74
+ # RC params (matplotlib)
75
+ text_color: str = "black"
76
+ axes_labelcolor: str = "black"
77
+ tick_color: str = "black"
78
+ figure_dpi: int = 110
79
+ savefig_dpi: int = 300
80
+
81
+ # Paramètres matplotlib appliqués par `Theme.apply()`.
82
+ THEME_RC_OVERRIDES = {
83
+ "text.color": "black",
84
+ "axes.labelcolor": "black",
85
+ "xtick.color": "black",
86
+ "ytick.color": "black",
87
+ "figure.dpi": 110,
88
+ "savefig.dpi": 300,
89
+ "savefig.bbox": "tight",
90
+ "svg.fonttype": "none",
91
+ "figure.facecolor": "#FFFCF2",
92
+ "axes.facecolor": "#FFFCF2",
93
+ }
94
+
95
+
96
+ def configure_theme(cfg: ThemeConfig) -> None:
97
+ """Applique une configuration **externe** au thème.
98
+
99
+ Cette fonction met Ă  jour les couleurs et palettes de :class:`Theme`
100
+ et prépare les paramètres matplotlib (``rcParams``) pour le fond,
101
+ les couleurs de texte et les résolutions.
102
+
103
+ Paramètres
104
+ ----------
105
+ cfg : ThemeConfig
106
+ Instance contenant l'ensemble des options de thème.
107
+ """
108
+ # Configure la palette dans la classe
109
+ Theme.configure(
110
+ primary=cfg.primary,
111
+ secondary=cfg.secondary,
112
+ tertiary=cfg.tertiary,
113
+ primary_variants=cfg.primary_variants,
114
+ secondary_variants=cfg.secondary_variants,
115
+ tertiary_variants=cfg.tertiary_variants,
116
+ sequential_light=cfg.sequential_light,
117
+ sequential_dark=cfg.sequential_dark,
118
+ light_amount=cfg.light_amount,
119
+ dark_amount=cfg.dark_amount,
120
+ )
121
+ Theme.BACKGROUND = cfg.background
122
+ # Prépare les overrides rcParams
123
+ THEME_RC_OVERRIDES.update({
124
+ "text.color": cfg.text_color,
125
+ "axes.labelcolor": cfg.axes_labelcolor,
126
+ "xtick.color": cfg.tick_color,
127
+ "ytick.color": cfg.tick_color,
128
+ "figure.dpi": cfg.figure_dpi,
129
+ "savefig.dpi": cfg.savefig_dpi,
130
+ # On conserve ces valeurs par défaut qui ne dépendent pas de la couleur
131
+ "savefig.bbox": "tight",
132
+ "svg.fonttype": "none",
133
+ "figure.facecolor": cfg.background,
134
+ "axes.facecolor": cfg.background,
135
+ })
136
+
137
+
138
+ def _config_from_mapping(data: Mapping[str, Any]) -> ThemeConfig:
139
+ """Convertit un mapping arbitraire en :class:`ThemeConfig`."""
140
+ allowed_fields = {f.name for f in fields(ThemeConfig)}
141
+ unknown_keys = set(data) - allowed_fields
142
+ if unknown_keys:
143
+ raise ValueError(
144
+ "Clés inconnues dans la configuration du thème: "
145
+ + ", ".join(sorted(unknown_keys))
146
+ )
147
+ filtered = {k: data[k] for k in allowed_fields if k in data}
148
+ return ThemeConfig(**filtered)
149
+
150
+
151
+ def load_brand(path: Union[str, Path]) -> ThemeConfig:
152
+ """Charge un fichier YAML et retourne une configuration de thème.
153
+
154
+ Le fichier doit contenir des clés correspondant aux attributs de
155
+ :class:`ThemeConfig`. Les valeurs absentes conservent les valeurs
156
+ par défaut de la dataclass.
157
+ """
158
+
159
+ try:
160
+ import yaml
161
+ except ImportError as exc: # pragma: no cover - dépendance optionnelle
162
+ raise RuntimeError(
163
+ "PyYAML est requis pour charger une charte graphique YAML. "
164
+ "Installez le paquet 'pyyaml'."
165
+ ) from exc
166
+
167
+ file_path = Path(path).expanduser()
168
+ if not file_path.exists():
169
+ raise FileNotFoundError(f"Fichier YAML introuvable: {file_path}")
170
+
171
+ with file_path.open("r", encoding="utf-8") as handle:
172
+ content = yaml.safe_load(handle) or {}
173
+
174
+ if not isinstance(content, Mapping):
175
+ raise ValueError(
176
+ "Le contenu du YAML doit être un mapping clé/valeur (dict)."
177
+ )
178
+
179
+ return _config_from_mapping(content)
180
+
181
+
182
+ def configure_brand(path: Union[str, Path]) -> ThemeConfig:
183
+ """Charge un fichier YAML puis applique la configuration obtenue."""
184
+
185
+ cfg = load_brand(path)
186
+ configure_theme(cfg)
187
+ return cfg
188
+
189
+
190
+ class Theme:
191
+ """Thème graphique pour Matplotlib et Seaborn.
192
+
193
+ La classe fournit :
194
+ - des couleurs principales et une palette qualitative étendue ;
195
+ - des cartes de couleurs (séquentielles et divergentes) ;
196
+ - une méthode :meth:`apply` pour appliquer le thème globalement ;
197
+ - des méthodes de démonstration pour un aperçu rapide.
198
+
199
+ Les couleurs ne sont pas figées : utilisez
200
+ :func:`configure_theme` pour injecter une configuration externe.
201
+ """
202
+
203
+ # --- Couleurs principales (configurables via Theme.configure) ---
204
+ # Valeurs par défaut (seront écrasées par configure())
205
+ PRIMARY: str = "#7451EB" # violet (chaud)
206
+ SECONDARY: str = "#EE8273" # corail (chaud)
207
+ TERTIARY: str = "#A6BD63" # vert (froid)
208
+ BACKGROUND: str = "#FFFCF2"
209
+
210
+ PALETTE: List[str] = ["#7451EB", "#EE8273", "#A6BD63"]
211
+
212
+ @classmethod
213
+ def base_palette(cls) -> List[str]:
214
+ """Retourne la palette fondamentale (PRIMARY, SECONDARY, TERTIARY)."""
215
+ return [cls.PRIMARY, cls.SECONDARY, cls.TERTIARY]
216
+
217
+ # Variantes (palette qualitative) – configurables
218
+ _PRIMARY_VARIANTS: List[str] = ["#9D7EF0", "#4B25D6"]
219
+ _SECONDARY_VARIANTS: List[str] = ["#F3A093", "#D95848"]
220
+ _TERTIARY_VARIANTS: List[str] = ["#BDD681", "#7E923F"]
221
+
222
+ # Colormaps séquentiels (clair -> couleur -> foncé) – configurables
223
+ _SEQUENTIALS = {
224
+ "primary": ["#f3f0fd", PRIMARY, "#2f1577"],
225
+ "secondary": ["#fdecea", SECONDARY, "#8b3025"],
226
+ "tertiary": ["#f6faec", TERTIARY, "#4b5c27"],
227
+ }
228
+
229
+ _NAMES = {"primary", "secondary", "tertiary"}
230
+
231
+ # --------- Configuration dynamique ---------
232
+ @staticmethod
233
+ def _to_rgb(color: str):
234
+ return np.array(mcolors.to_rgb(color))
235
+
236
+ @classmethod
237
+ def _tint(cls, color: str, amount: float = 0.85) -> str:
238
+ """Retourne une version éclaircie de ``color``.
239
+
240
+ Le mélange avec le blanc est contrôlé par ``amount`` (0..1).
241
+ """
242
+ c = cls._to_rgb(color)
243
+ white = np.array([1.0, 1.0, 1.0])
244
+ mixed = (1 - amount) * c + amount * white
245
+ return mcolors.to_hex(mixed) # type: ignore
246
+
247
+ @classmethod
248
+ def _shade(cls, color: str, amount: float = 0.65) -> str:
249
+ """Retourne une version assombrie de ``color``.
250
+
251
+ Le mélange avec le noir est contrôlé par ``amount`` (0..1).
252
+ """
253
+ c = cls._to_rgb(color)
254
+ black = np.array([0.0, 0.0, 0.0])
255
+ mixed = (1 - amount) * c + amount * black
256
+ return mcolors.to_hex(mixed) # type: ignore
257
+
258
+ @classmethod
259
+ def _compute_sequentials(
260
+ cls,
261
+ primary: str,
262
+ secondary: str,
263
+ tertiary: str,
264
+ light_overrides: Optional[dict] = None,
265
+ dark_overrides: Optional[dict] = None,
266
+ light_amount: float = 0.85,
267
+ dark_amount: float = 0.65,
268
+ ) -> dict:
269
+ """Construit les stops [clair, milieu, foncé] de chaque couleur.
270
+
271
+ Paramètres
272
+ ----------
273
+ primary, secondary, tertiary : str
274
+ Couleurs principales en hex.
275
+ light_overrides, dark_overrides : dict | None
276
+ Remplacements explicites pour les teintes claires/foncées.
277
+ light_amount, dark_amount : float
278
+ Coefficients de mélange quand aucun remplacement n'est fourni.
279
+
280
+ Retour
281
+ ------
282
+ dict
283
+ Dictionnaire {nom: [clair, milieu, foncé]}.
284
+ """
285
+ light_overrides = light_overrides or {}
286
+ dark_overrides = dark_overrides or {}
287
+ base = {
288
+ "primary": primary,
289
+ "secondary": secondary,
290
+ "tertiary": tertiary,
291
+ }
292
+ seq = {}
293
+ for k, mid in base.items():
294
+ light = light_overrides.get(k) or cls._tint(mid, amount=light_amount)
295
+ dark = dark_overrides.get(k) or cls._shade(mid, amount=dark_amount)
296
+ seq[k] = [light, mid, dark]
297
+ return seq
298
+
299
+ @classmethod
300
+ def configure(
301
+ cls,
302
+ *,
303
+ primary: Optional[str] = None,
304
+ secondary: Optional[str] = None,
305
+ tertiary: Optional[str] = None,
306
+ primary_variants: Optional[List[str]] = None,
307
+ secondary_variants: Optional[List[str]] = None,
308
+ tertiary_variants: Optional[List[str]] = None,
309
+ sequential_light: Optional[dict] = None,
310
+ sequential_dark: Optional[dict] = None,
311
+ light_amount: float = 0.85,
312
+ dark_amount: float = 0.65,
313
+ ) -> None:
314
+ """Met Ă  jour dynamiquement les couleurs et colormaps de la classe.
315
+
316
+ Exemples
317
+ --------
318
+ >>> Theme.configure(primary="#0072CE", secondary="#FF6A00")
319
+ >>> Theme.configure(
320
+ ... primary="#1f77b4",
321
+ ... sequential_light={"primary": "#eef5fb"},
322
+ ... sequential_dark={"primary": "#0b3050"},
323
+ ... )
324
+
325
+ Paramètres
326
+ ----------
327
+ primary, secondary, tertiary : str | None
328
+ Couleurs principales.
329
+ primary_variants, secondary_variants, tertiary_variants : list[str] | None
330
+ Variantes qualitatives.
331
+ sequential_light, sequential_dark : dict | None
332
+ Remplacements pour les teintes claires/foncées.
333
+ light_amount, dark_amount : float
334
+ Coefficients de mélange par défaut.
335
+ """
336
+ if primary:
337
+ cls.PRIMARY = primary
338
+ if secondary:
339
+ cls.SECONDARY = secondary
340
+ if tertiary:
341
+ cls.TERTIARY = tertiary
342
+
343
+ if primary_variants is not None:
344
+ cls._PRIMARY_VARIANTS = primary_variants
345
+ if secondary_variants is not None:
346
+ cls._SECONDARY_VARIANTS = secondary_variants
347
+ if tertiary_variants is not None:
348
+ cls._TERTIARY_VARIANTS = tertiary_variants
349
+
350
+ # Recalcule les rampes séquentielles (avec overrides éventuels)
351
+ cls._SEQUENTIALS = cls._compute_sequentials(
352
+ cls.PRIMARY,
353
+ cls.SECONDARY,
354
+ cls.TERTIARY,
355
+ light_overrides=sequential_light,
356
+ dark_overrides=sequential_dark,
357
+ light_amount=light_amount,
358
+ dark_amount=dark_amount,
359
+ )
360
+ cls.PALETTE = cls.base_palette()
361
+
362
+ # --------- helpers internes ---------
363
+ @classmethod
364
+ def _get_seq(cls, key: str) -> List[str]:
365
+ """Retourne la rampe séquentielle associée à ``key``.
366
+
367
+ Déclenche ``ValueError`` si la clé est inconnue.
368
+ """
369
+ key = key.lower()
370
+ if key not in cls._NAMES:
371
+ raise ValueError(f"Couleur inconnue: {key}. Choisir parmi {sorted(cls._NAMES)}.")
372
+ return cls._SEQUENTIALS[key]
373
+
374
+ @staticmethod
375
+ def _from_list(name: str, colors: List[str]) -> mcolors.LinearSegmentedColormap:
376
+ """Crée un ``LinearSegmentedColormap`` à partir d'une liste.
377
+ """
378
+ return mcolors.LinearSegmentedColormap.from_list(name, colors)
379
+
380
+ @classmethod
381
+ def _make_diverging(
382
+ cls,
383
+ start_key: str,
384
+ end_key: str,
385
+ *,
386
+ center: Optional[str] = None,
387
+ strong_ends: bool = True,
388
+ blend_center: bool = False,
389
+ blend_ratio: float = 0.5,
390
+ ) -> Tuple[str, List[str]]:
391
+ """Construit un colormap divergent Ă  partir de deux rampes.
392
+
393
+ Stops générés :
394
+ ``[dark_start?, start_mid, center, end_mid, dark_end?]``
395
+ """
396
+ s_seq = cls._get_seq(start_key) # [light, mid, dark]
397
+ e_seq = cls._get_seq(end_key) # [light, mid, dark]
398
+
399
+ if blend_center:
400
+ center_color = mix_colors(s_seq[1], e_seq[1], ratio=blend_ratio)
401
+ else:
402
+ center_color = center or "#f7f7f7"
403
+
404
+ colors: List[str] = []
405
+ if strong_ends:
406
+ colors.append(s_seq[2]) # dark start
407
+ colors.append(s_seq[1]) # start mid
408
+ colors.append(center_color) # centre neutre ou mélange
409
+ colors.append(e_seq[1]) # end mid
410
+ if strong_ends:
411
+ colors.append(e_seq[2]) # dark end
412
+
413
+ name = f"ocr_div_{start_key}_{end_key}"
414
+ return name, colors
415
+
416
+ # --------- API publique ---------
417
+ @classmethod
418
+ def colormap(
419
+ cls,
420
+ mode: Literal["primary", "secondary", "tertiary", "sequential", "diverging"] = "primary",
421
+ *,
422
+ start: Optional[Literal["primary", "secondary", "tertiary"]] = None,
423
+ end: Optional[Literal["primary", "secondary", "tertiary"]] = None,
424
+ reverse: bool = False,
425
+ as_cmap: bool = True,
426
+ center: Optional[str] = None,
427
+ blend_center: bool = False,
428
+ blend_ratio: float = 0.5,
429
+ strong_ends: bool = True,
430
+ ):
431
+ """Retourne un colormap Matplotlib ou la liste des stops.
432
+
433
+ Utilisation
434
+ -----------
435
+ Séquentiel autour d'une couleur :
436
+ colormap("primary")
437
+ colormap("sequential", start="primary")
438
+ Divergent entre deux couleurs :
439
+ colormap("diverging", start="primary", end="tertiary")
440
+
441
+ Paramètres
442
+ ----------
443
+ mode : {"primary", "secondary", "tertiary", "sequential", "diverging"}
444
+ Type de colormap souhaité.
445
+ start, end : {"primary", "secondary", "tertiary"} | None
446
+ Couleurs de départ/arrivée (suivant le mode).
447
+ reverse : bool
448
+ Inverse l'ordre des couleurs.
449
+ as_cmap : bool
450
+ Si ``True``, retourne un objet ``Colormap`` ; sinon la liste
451
+ des valeurs hexadécimales.
452
+ center : str | None
453
+ Couleur centrale explicite (hexadécimal). Ignorée si ``blend_center`` vaut ``True``.
454
+ blend_center : bool
455
+ Mélange automatiquement les teintes ``start`` et ``end`` pour générer la couleur centrale.
456
+ blend_ratio : float
457
+ Ratio de mélange (0..1) appliqué quand ``blend_center`` est activé.
458
+ strong_ends : bool
459
+ Ajoute les teintes foncées des rampes aux extrémités du colormap divergent.
460
+ """
461
+ # Alias pour compat : "primary"/"secondary"/"tertiary" => séquentiel
462
+ if mode in {"primary", "secondary", "tertiary"}:
463
+ seq = cls._get_seq(mode)
464
+ colors = list(reversed(seq)) if reverse else seq
465
+ return cls._from_list(f"ocr_{mode}", colors) if as_cmap else colors
466
+
467
+ #mode = mode.lower()
468
+ if mode == "sequential":
469
+ key = (start or "primary").lower()
470
+ seq = cls._get_seq(key)
471
+ colors = list(reversed(seq)) if reverse else seq
472
+ return cls._from_list(f"ocr_seq_{key}", colors) if as_cmap else colors
473
+
474
+ if mode == "diverging":
475
+ if not start or not end:
476
+ raise ValueError("Pour un colormap diverging, fournir start=... et end=...")
477
+ #start = start.lower()
478
+ #end = end.lower()
479
+ if start not in cls._NAMES or end not in cls._NAMES:
480
+ raise ValueError(f"start/end doivent ĂŞtre dans {sorted(cls._NAMES)}.")
481
+ name, colors = cls._make_diverging(
482
+ start,
483
+ end,
484
+ center=center,
485
+ strong_ends=strong_ends,
486
+ blend_center=blend_center,
487
+ blend_ratio=blend_ratio,
488
+ )
489
+ if reverse:
490
+ colors = list(reversed(colors))
491
+ return cls._from_list(name, colors) if as_cmap else colors
492
+
493
+ raise ValueError("mode inconnu. Utiliser 'primary'/'secondary'/'tertiary' ou 'sequential'/'diverging'.")
494
+
495
+ @classmethod
496
+ def apply(cls, *, context: str = "notebook", style: str = "white") -> List[str]:
497
+ """Applique le thème global Seaborn/Matplotlib.
498
+
499
+ Retourne la palette qualitative étendue utilisée par Seaborn.
500
+ """
501
+ pal = cls.extended_palette()
502
+ sns.set_theme(context=context, style=style, palette=pal) # type: ignore
503
+ plt.rcParams.update(THEME_RC_OVERRIDES)
504
+ return pal
505
+
506
+ @classmethod
507
+ def extended_palette(cls) -> List[str]:
508
+ """Retourne la palette qualitative étendue.
509
+
510
+ Utile pour des graphiques multi‑séries (barres, lignes, etc.).
511
+ """
512
+ return [
513
+ cls.PRIMARY, *cls._PRIMARY_VARIANTS,
514
+ cls.SECONDARY, *cls._SECONDARY_VARIANTS,
515
+ cls.TERTIARY, *cls._TERTIARY_VARIANTS,
516
+ ]
517
+
518
+ # --------- Démos appelables ---------
519
+ @staticmethod
520
+ def _demo_field(n: int = 300):
521
+ """Génère un champ 2D lisse destiné à ``imshow``."""
522
+ x = np.linspace(-3, 3, n)
523
+ y = np.linspace(-3, 3, n)
524
+ X, Y = np.meshgrid(x, y)
525
+ Z = np.sin(X) * np.cos(Y)
526
+ return X, Y, Z
527
+
528
+ @staticmethod
529
+ def _demo_matrix(shape: Tuple[int, int] = (10, 12), seed: int = 0):
530
+ """Génère une matrice aléatoire pour des heatmaps reproductibles."""
531
+ rng = np.random.default_rng(seed)
532
+ return rng.standard_normal(shape)
533
+
534
+ @classmethod
535
+ def demo_imshow_sequential(
536
+ cls,
537
+ *,
538
+ start: Literal["primary", "secondary", "tertiary"] = "primary",
539
+ reverse: bool = False,
540
+ with_colorbar: bool = True,
541
+ title: Optional[str] = None,
542
+ apply_theme: bool = False,
543
+ ) -> None:
544
+ """Affiche une démo ``imshow`` avec un colormap séquentiel.
545
+
546
+ Exemple
547
+ -------
548
+ >>> Theme.demo_imshow_sequential(start="tertiary", reverse=True)
549
+ """
550
+ if apply_theme:
551
+ cls.apply()
552
+ _, _, Z = cls._demo_field()
553
+ cmap = cls.colormap("sequential", start=start, reverse=reverse)
554
+ plt.imshow(Z, cmap=cmap, origin="lower") # type: ignore
555
+ direction = "foncé → clair" if reverse else "clair → foncé"
556
+ plt.title(title or f"Séquentiel {start.upper()} ({direction})")
557
+ if with_colorbar:
558
+ plt.colorbar()
559
+ plt.show()
560
+
561
+ @classmethod
562
+ def demo_imshow_diverging(
563
+ cls,
564
+ *,
565
+ start: Literal["primary", "secondary", "tertiary"] = "primary",
566
+ end: Literal["primary", "secondary", "tertiary"] = "secondary",
567
+ reverse: bool = False,
568
+ with_colorbar: bool = True,
569
+ title: Optional[str] = None,
570
+ apply_theme: bool = False,
571
+ center: Optional[str] = None,
572
+ blend_center: bool = False,
573
+ blend_ratio: float = 0.5,
574
+ strong_ends: bool = True,
575
+ ) -> None:
576
+ """Affiche une démo ``imshow`` avec un colormap divergent.
577
+
578
+ Exemple
579
+ -------
580
+ >>> Theme.demo_imshow_diverging(start="primary", end="secondary")
581
+ """
582
+ if apply_theme:
583
+ cls.apply()
584
+ _, _, Z = cls._demo_field()
585
+ cmap = cls.colormap(
586
+ "diverging",
587
+ start=start,
588
+ end=end,
589
+ reverse=reverse,
590
+ center=center,
591
+ blend_center=blend_center,
592
+ blend_ratio=blend_ratio,
593
+ strong_ends=strong_ends,
594
+ )
595
+ plt.imshow(Z, cmap=cmap, origin="lower") # type: ignore
596
+ plt.title(title or f"Diverging {start.upper()} ↔ {end.upper()}")
597
+ if with_colorbar:
598
+ plt.colorbar()
599
+ plt.show()
600
+
601
+ @classmethod
602
+ def demo_heatmap_sequential(
603
+ cls,
604
+ *,
605
+ start: Literal["primary", "secondary", "tertiary"] = "primary",
606
+ reverse: bool = False,
607
+ title: Optional[str] = None,
608
+ apply_theme: bool = True,
609
+ ) -> None:
610
+ """Affiche une heatmap Seaborn en mode séquentiel.
611
+
612
+ Exemple
613
+ -------
614
+ >>> Theme.demo_heatmap_sequential(start="primary")
615
+ """
616
+ if apply_theme:
617
+ cls.apply()
618
+ data = cls._demo_matrix()
619
+ plt.figure(figsize=(6, 4))
620
+ sns.heatmap(data, cmap=cls.colormap("sequential", start=start, reverse=reverse)) # type: ignore
621
+ direction = "foncé → clair" if reverse else "clair → foncé"
622
+ plt.title(title or f"Heatmap séquentielle - {start.upper()} ({direction})")
623
+ plt.show()
624
+
625
+ @classmethod
626
+ def demo_heatmap_diverging(
627
+ cls,
628
+ *,
629
+ start: Literal["primary", "secondary", "tertiary"] = "primary",
630
+ end: Literal["primary", "secondary", "tertiary"] = "tertiary",
631
+ reverse: bool = False,
632
+ title: Optional[str] = None,
633
+ apply_theme: bool = True,
634
+ ) -> None:
635
+ """Affiche une heatmap Seaborn en mode divergent.
636
+
637
+ Exemple
638
+ -------
639
+ >>> Theme.demo_heatmap_diverging(start="primary", end="tertiary")
640
+ """
641
+ if apply_theme:
642
+ cls.apply()
643
+ data = cls._demo_matrix()
644
+ plt.figure(figsize=(6, 4))
645
+ sns.heatmap(data, cmap=cls.colormap("diverging", start=start, end=end, reverse=reverse)) # type: ignore
646
+ plt.title(title or f"Heatmap diverging - {start.upper()} ↔ {end.upper()}")
647
+ plt.show()
648
+
649
+
650
+
651
+ # API fonctionnelle (compatibilité)
652
+
653
+ def set_theme():
654
+ """Applique le thème OC et retourne la palette étendue.
655
+
656
+ Raccourci rétrocompatible de :meth:`Theme.apply`.
657
+ """
658
+ return Theme.apply()
659
+
660
+
661
+ def set_colormap(
662
+ mode: Literal["primary", "secondary", "tertiary", "sequential", "diverging"] = "primary",
663
+ *,
664
+ start: Optional[Literal["primary", "secondary", "tertiary"]] = None,
665
+ end: Optional[Literal["primary", "secondary", "tertiary"]] = None,
666
+ reverse: bool = False,
667
+ as_cmap: bool = True,
668
+ ):
669
+ """Raccourci pour obtenir un colormap OC.
670
+
671
+ Voir :meth:`Theme.colormap` pour le détail des paramètres.
672
+ """
673
+ return Theme.colormap(mode, start=start, end=end, reverse=reverse, as_cmap=as_cmap)
674
+
675
+ # Configuration par défaut (externe à la classe)
676
+ _default_cfg = ThemeConfig(
677
+ primary="#7451EB",
678
+ secondary="#EE8273",
679
+ tertiary="#A6BD63",
680
+ background="#FFFCF2",
681
+ primary_variants=["#9D7EF0", "#4B25D6"],
682
+ secondary_variants=["#F3A093", "#D95848"],
683
+ tertiary_variants=["#BDD681", "#7E923F"],
684
+ sequential_light={
685
+ "primary": "#f3f0fd",
686
+ "secondary": "#fdecea",
687
+ "tertiary": "#f6faec",
688
+ },
689
+ sequential_dark={
690
+ "primary": "#2f1577",
691
+ "secondary": "#8b3025",
692
+ "tertiary": "#4b5c27",
693
+ },
694
+ text_color="black",
695
+ )
696
+ configure_theme(_default_cfg)
697
+ def mix_colors(color1: str, color2: str, ratio: float = 0.5) -> str:
698
+ """Mélange deux couleurs hexadécimales selon ``ratio`` (0-1)."""
699
+ rgb1 = np.array(mcolors.to_rgb(color1))
700
+ rgb2 = np.array(mcolors.to_rgb(color2))
701
+ mixed = (1 - ratio) * rgb1 + ratio * rgb2
702
+ return mcolors.to_hex(mixed) # type: ignore
703
+
704
+
705
+ def make_diverging_cmap(
706
+ primary: str,
707
+ secondary: str,
708
+ name: str = "custom_diverging",
709
+ ratio: float = 0.5,
710
+ ):
711
+ """Crée un colormap divergent simple (primary → mix → secondary)."""
712
+ mid = mix_colors(primary, secondary, ratio=ratio)
713
+ return mcolors.LinearSegmentedColormap.from_list(name, [primary, mid, secondary])