GitHub Actions commited on
Commit
438eb35
·
1 Parent(s): cdef3b2

🚀 Auto-deploy from GitHub Actions

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .github/workflows/deploy.yml +7 -3
  2. .github/workflows/static.yml +37 -0
  3. .gitignore +5 -12
  4. README.md +13 -10
  5. app.py +35 -1
  6. hf_space/hf_space/.gitignore +5 -0
  7. hf_space/hf_space/README.md +7 -27
  8. hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +2 -2
  9. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +34 -18
  10. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/LICENSE +10 -0
  11. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Makefile +85 -0
  12. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +178 -4
  13. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +7 -4
  14. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +191 -1
  15. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +328 -0
  16. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py +0 -0
  17. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py +7 -0
  18. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml +37 -0
  19. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes +35 -0
  20. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore +2 -0
  21. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +12 -0
  22. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +7 -0
  23. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py +17 -0
  24. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep +0 -0
  25. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
  26. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock +0 -0
  27. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.toml +2 -0
  28. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py +1 -0
  29. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/config.py +32 -0
  30. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py +29 -0
  31. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py +29 -0
  32. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py +0 -0
  33. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py +30 -0
  34. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py +30 -0
  35. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/plots.py +29 -0
  36. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml +53 -0
  37. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/references/.gitkeep +0 -0
  38. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/.gitkeep +0 -0
  39. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep +0 -0
  40. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_data.py +5 -0
  41. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb +0 -0
  42. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py +3 -0
  43. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/branding.py +52 -0
  44. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py +188 -14
  45. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/explainability.py +102 -0
  46. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py +156 -14
  47. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py +84 -14
  48. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py +328 -15
  49. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/settings.py +114 -0
  50. hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/settings.yml +56 -0
.github/workflows/deploy.yml CHANGED
@@ -1,4 +1,4 @@
1
- name: Deploy to Hugging Face Spaces
2
 
3
  on:
4
  push:
@@ -19,12 +19,16 @@ jobs:
19
  - name: Setup Python
20
  uses: actions/setup-python@v5
21
  with:
22
- python-version: "3.10"
23
 
24
  - name: Install dependencies
25
  run: |
26
  python -m pip install --upgrade pip
27
  if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
 
 
 
 
28
 
29
  - name: Deploy to Hugging Face Space
30
  env:
@@ -33,7 +37,7 @@ jobs:
33
  git config --global user.email "actions@github.com"
34
  git config --global user.name "GitHub Actions"
35
  git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
36
- rsync -av --exclude '.git' --exclude 'output/' --exclude 'models/' ./ hf_space/
37
  cd hf_space
38
  git add .
39
  git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
 
1
+ name: Deploiement vers Hugging Face Spaces
2
 
3
  on:
4
  push:
 
19
  - name: Setup Python
20
  uses: actions/setup-python@v5
21
  with:
22
+ python-version: "3.11"
23
 
24
  - name: Install dependencies
25
  run: |
26
  python -m pip install --upgrade pip
27
  if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28
+ pip install -e .
29
+
30
+ - name: Préparer les données et le modèle
31
+ run: python main.py
32
 
33
  - name: Deploy to Hugging Face Space
34
  env:
 
37
  git config --global user.email "actions@github.com"
38
  git config --global user.name "GitHub Actions"
39
  git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
40
+ rsync -av --exclude '.git' --exclude 'docs' ./ hf_space/
41
  cd hf_space
42
  git add .
43
  git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
.github/workflows/static.yml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploiement de la documentation
2
+
3
+ on:
4
+ push:
5
+ branches: ["main"]
6
+ workflow_dispatch:
7
+
8
+ permissions:
9
+ contents: read
10
+ pages: write
11
+ id-token: write
12
+
13
+ concurrency:
14
+ group: "pages"
15
+ cancel-in-progress: false
16
+
17
+ jobs:
18
+ deploy:
19
+ environment:
20
+ name: github-pages
21
+ url: ${{ steps.deployment.outputs.page_url }}
22
+ runs-on: ubuntu-latest
23
+ steps:
24
+ - name: Checkout
25
+ uses: actions/checkout@v4
26
+
27
+ - name: Setup Pages
28
+ uses: actions/configure-pages@v5
29
+
30
+ - name: Upload artifact
31
+ uses: actions/upload-pages-artifact@v3
32
+ with:
33
+ path: 'docs/site'
34
+
35
+ - name: Deploy to GitHub Pages
36
+ id: deployment
37
+ uses: actions/deploy-pages@v4
.gitignore CHANGED
@@ -1,19 +1,18 @@
1
- # Data
2
- /data/
3
-
4
- # Mac OS-specific storage files
5
  .DS_Store
6
  *.code-workspace
7
  *.pdf
8
  /output/
9
  questions.md
10
- *.pdf
11
-
 
12
 
13
  # vim
14
  *.swp
15
  *.swo
16
 
 
17
  ## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
18
 
19
  # Byte-compiled / optimized / DLL files
@@ -86,9 +85,6 @@ instance/
86
  # Scrapy stuff:
87
  .scrapy
88
 
89
- # MkDocs documentation
90
- docs/site/
91
-
92
  # PyBuilder
93
  .pybuilder/
94
  target/
@@ -166,9 +162,6 @@ venv.bak/
166
  # Rope project settings
167
  .ropeproject
168
 
169
- # mkdocs documentation
170
- /site
171
-
172
  # mypy
173
  .mypy_cache/
174
  .dmypy.json
 
1
+ # Spécifique à ce projet
 
 
 
2
  .DS_Store
3
  *.code-workspace
4
  *.pdf
5
  /output/
6
  questions.md
7
+ /reports/
8
+ /data/
9
+ runtime.txt
10
 
11
  # vim
12
  *.swp
13
  *.swo
14
 
15
+
16
  ## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
17
 
18
  # Byte-compiled / optimized / DLL files
 
85
  # Scrapy stuff:
86
  .scrapy
87
 
 
 
 
88
  # PyBuilder
89
  .pybuilder/
90
  target/
 
162
  # Rope project settings
163
  .ropeproject
164
 
 
 
 
165
  # mypy
166
  .mypy_cache/
167
  .dmypy.json
README.md CHANGED
@@ -10,13 +10,14 @@ pinned: true
10
  short_description: Projet 05 formation Openclassrooms
11
  ---
12
 
13
- # projet_05
14
 
15
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
16
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
17
  </a>
18
 
19
- Déployez un modèle de Machine Learning
 
20
 
21
  ## Organisation du projet
22
 
@@ -305,10 +306,6 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
305
  [stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
306
  [issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
307
  [issues-url]: https://github.com/stephmnt/OCR_projet05/issues
308
- [license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
309
- [license-url]: https://github.com/stephmnt/OCR_projet05/blob/master/LICENSE.txt
310
- [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
311
- [linkedin-url]: https://linkedin.com/in/stephanemanet
312
  [product-screenshot]: images/screenshot.png
313
  [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
314
  <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
@@ -328,9 +325,15 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
328
  [Bootstrap-url]: https://getbootstrap.com
329
  [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
330
  [JQuery-url]: https://jquery.com
 
 
 
 
 
331
  <!-- TODO: -->
332
- [Postgres]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
333
- [Python]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
334
- [MkDocs]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
 
335
  [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
336
- [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
 
10
  short_description: Projet 05 formation Openclassrooms
11
  ---
12
 
13
+ # projet_05 : Déployez un modèle de Machine Learning
14
 
15
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
16
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
17
  </a>
18
 
19
+ [![mkdocs-shield]][mkdocs-url]
20
+
21
 
22
  ## Organisation du projet
23
 
 
306
  [stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
307
  [issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
308
  [issues-url]: https://github.com/stephmnt/OCR_projet05/issues
 
 
 
 
309
  [product-screenshot]: images/screenshot.png
310
  [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
311
  <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
 
325
  [Bootstrap-url]: https://getbootstrap.com
326
  [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
327
  [JQuery-url]: https://jquery.com
328
+ <!-- OK -->
329
+ [license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
330
+ [license-url]: https://github.com/stephmnt/OCR_Projet05/blob/main/LICENSE
331
+ [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
332
+ [linkedin-url]: https://linkedin.com/in/stephanemanet
333
  <!-- TODO: -->
334
+ [postgres-shield]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
335
+ [python-shield]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
336
+ [mkdocs-shield]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
337
+ [mkdocs-url]: https://stephmnt.github.io/OCR_Projet05/
338
  [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
339
+ [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
app.py CHANGED
@@ -17,12 +17,30 @@ SCHEMA_PATH = Path("data/processed/schema.json")
17
 
18
 
19
  def _load_schema(path: Path) -> dict[str, Any]:
 
 
 
 
 
 
 
 
20
  if not path.exists():
21
  return {}
22
  return json.loads(path.read_text(encoding="utf-8"))
23
 
24
 
25
  def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
 
 
 
 
 
 
 
 
 
 
26
  if schema:
27
  candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
28
  if candidates:
@@ -37,6 +55,18 @@ def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
37
 
38
 
39
  def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
 
 
40
  if isinstance(payload, pd.DataFrame):
41
  df = payload.copy()
42
  elif payload is None:
@@ -50,6 +80,7 @@ def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
50
 
51
 
52
  def _ensure_model():
 
53
  if PIPELINE is None:
54
  raise gr.Error(
55
  "Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
@@ -57,6 +88,7 @@ def _ensure_model():
57
 
58
 
59
  def score_table(table):
 
60
  _ensure_model()
61
  df = _convert_input(table, FEATURE_ORDER)
62
  drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
@@ -70,6 +102,7 @@ def score_table(table):
70
 
71
 
72
  def score_csv(upload):
 
73
  _ensure_model()
74
  if upload is None:
75
  raise gr.Error("Veuillez déposer un fichier CSV.")
@@ -85,6 +118,7 @@ def score_csv(upload):
85
 
86
 
87
  def predict_from_form(*values):
 
88
  _ensure_model()
89
  if not FEATURE_ORDER:
90
  raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
@@ -132,7 +166,7 @@ with gr.Blocks(title="Prédicteur d'attrition") as demo:
132
 
133
  if PIPELINE is None:
134
  gr.Markdown(
135
- "⚠️ **Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
136
  )
137
  else:
138
  gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
 
17
 
18
 
19
  def _load_schema(path: Path) -> dict[str, Any]:
20
+ """Load the schema definition stored as JSON.
21
+
22
+ Args:
23
+ path: Path to the schema.json file.
24
+
25
+ Returns:
26
+ A dictionary describing the schema or an empty dict if the file is missing.
27
+ """
28
  if not path.exists():
29
  return {}
30
  return json.loads(path.read_text(encoding="utf-8"))
31
 
32
 
33
  def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
34
+ """Infer the ordered list of features expected by the model.
35
+
36
+ Args:
37
+ metadata: Metadata produced during training.
38
+ schema: Schema derived from `features.py`.
39
+ pipeline: Loaded sklearn pipeline (optional).
40
+
41
+ Returns:
42
+ List of feature names in the order expected by the model.
43
+ """
44
  if schema:
45
  candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
46
  if candidates:
 
55
 
56
 
57
  def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
58
+ """Normalize any user input into a validated DataFrame.
59
+
60
+ Args:
61
+ payload: Raw table coming from Gradio (DataFrame, list, etc.).
62
+ headers: Expected column names.
63
+
64
+ Returns:
65
+ A sanitized DataFrame.
66
+
67
+ Raises:
68
+ gr.Error: If no valid row is provided.
69
+ """
70
  if isinstance(payload, pd.DataFrame):
71
  df = payload.copy()
72
  elif payload is None:
 
80
 
81
 
82
  def _ensure_model():
83
+ """Ensure that a pipeline has been loaded before inference."""
84
  if PIPELINE is None:
85
  raise gr.Error(
86
  "Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
 
88
 
89
 
90
  def score_table(table):
91
+ """Score data entered via the interactive table."""
92
  _ensure_model()
93
  df = _convert_input(table, FEATURE_ORDER)
94
  drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
 
102
 
103
 
104
  def score_csv(upload):
105
+ """Score a CSV uploaded by the user."""
106
  _ensure_model()
107
  if upload is None:
108
  raise gr.Error("Veuillez déposer un fichier CSV.")
 
118
 
119
 
120
  def predict_from_form(*values):
121
+ """Score a single row coming from the form tab."""
122
  _ensure_model()
123
  if not FEATURE_ORDER:
124
  raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
 
166
 
167
  if PIPELINE is None:
168
  gr.Markdown(
169
+ "**Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
170
  )
171
  else:
172
  gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
hf_space/hf_space/.gitignore CHANGED
@@ -4,6 +4,11 @@
4
  # Mac OS-specific storage files
5
  .DS_Store
6
  *.code-workspace
 
 
 
 
 
7
 
8
  # vim
9
  *.swp
 
4
  # Mac OS-specific storage files
5
  .DS_Store
6
  *.code-workspace
7
+ *.pdf
8
+ /output/
9
+ questions.md
10
+ *.pdf
11
+
12
 
13
  # vim
14
  *.swp
hf_space/hf_space/README.md CHANGED
@@ -1,5 +1,3 @@
1
- # projet_05
2
-
3
  ---
4
  title: OCR_Projet05
5
  emoji: 🔥
@@ -12,6 +10,8 @@ pinned: true
12
  short_description: Projet 05 formation Openclassrooms
13
  ---
14
 
 
 
15
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
16
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
17
  </a>
@@ -76,17 +76,6 @@ Déployez un modèle de Machine Learning
76
 
77
  --------
78
 
79
- ---
80
- title: Projet 05
81
- emoji: 👀
82
- colorFrom: indigo
83
- colorTo: green
84
- sdk: gradio
85
- sdk_version: 5.49.1
86
- app_file: app.py
87
- pinned: false
88
- ---
89
-
90
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
91
 
92
  <!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
@@ -99,8 +88,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
99
  *** Thanks again! Now go create something AMAZING! :D
100
  -->
101
 
102
-
103
-
104
  <!-- PROJECT SHIELDS -->
105
  <!--
106
  *** I'm using markdown "reference style" links for readability.
@@ -118,8 +105,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
118
  [![LinkedIn][linkedin-shield]][linkedin-url]
119
  ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
120
 
121
-
122
-
123
  <!-- PROJECT LOGO -->
124
  <br />
125
  <div align="center">
@@ -143,8 +128,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
143
  </p>
144
  </div>
145
 
146
-
147
-
148
  <!-- TABLE OF CONTENTS -->
149
  <details>
150
  <summary>Table of Contents</summary>
@@ -191,8 +174,6 @@ Here's a blank template to get started. To avoid retyping too much info, do a se
191
 
192
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
193
 
194
-
195
-
196
  <!-- GETTING STARTED -->
197
  ## Getting Started
198
 
@@ -212,20 +193,19 @@ This is an example of how to list things you need to use the software and how to
212
  pip install -r requirements.txt
213
  uvicorn app.main:app --reload
214
 
215
- 1. Get a free API Key at [https://example.com](https://example.com)
216
- 2. Clone the repo
217
  ```sh
218
- git clone https://github.com/github_username/repo_name.git
219
  ```
220
- 3. Install NPM packages
221
  ```sh
222
  npm install
223
  ```
224
- 4. Enter your API in `config.js`
225
  ```js
226
  const API_KEY = 'ENTER YOUR API';
227
  ```
228
- 5. Change git remote url to avoid accidental pushes to base project
229
  ```sh
230
  git remote set-url origin github_username/repo_name
231
  git remote -v # confirm the changes
 
 
 
1
  ---
2
  title: OCR_Projet05
3
  emoji: 🔥
 
10
  short_description: Projet 05 formation Openclassrooms
11
  ---
12
 
13
+ # projet_05
14
+
15
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
16
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
17
  </a>
 
76
 
77
  --------
78
 
 
 
 
 
 
 
 
 
 
 
 
79
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
80
 
81
  <!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
 
88
  *** Thanks again! Now go create something AMAZING! :D
89
  -->
90
 
 
 
91
  <!-- PROJECT SHIELDS -->
92
  <!--
93
  *** I'm using markdown "reference style" links for readability.
 
105
  [![LinkedIn][linkedin-shield]][linkedin-url]
106
  ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
107
 
 
 
108
  <!-- PROJECT LOGO -->
109
  <br />
110
  <div align="center">
 
128
  </p>
129
  </div>
130
 
 
 
131
  <!-- TABLE OF CONTENTS -->
132
  <details>
133
  <summary>Table of Contents</summary>
 
174
 
175
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
176
 
 
 
177
  <!-- GETTING STARTED -->
178
  ## Getting Started
179
 
 
193
  pip install -r requirements.txt
194
  uvicorn app.main:app --reload
195
 
196
+ 1. Clone the repo
 
197
  ```sh
198
+ git clone https://github.com/stephmnt/OCR_Projet05.git
199
  ```
200
+ 2. Install NPM packages
201
  ```sh
202
  npm install
203
  ```
204
+ 3. Enter your API in `config.js`
205
  ```js
206
  const API_KEY = 'ENTER YOUR API';
207
  ```
208
+ 4. Change git remote url to avoid accidental pushes to base project
209
  ```sh
210
  git remote set-url origin github_username/repo_name
211
  git remote -v # confirm the changes
hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml CHANGED
@@ -33,8 +33,8 @@ jobs:
33
  git config --global user.email "actions@github.com"
34
  git config --global user.name "GitHub Actions"
35
  git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
36
- rsync -av --exclude '.git' ./ hf_space/
37
  cd hf_space
38
  git add .
39
  git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
40
- git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
 
33
  git config --global user.email "actions@github.com"
34
  git config --global user.name "GitHub Actions"
35
  git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
36
+ rsync -av --exclude '.git' --exclude 'output/' --exclude 'models/' ./ hf_space/
37
  cd hf_space
38
  git add .
39
  git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
40
+ git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED
@@ -1,5 +1,17 @@
1
  # projet_05
2
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
4
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
5
  </a>
@@ -57,6 +69,11 @@ Déployez un modèle de Machine Learning
57
  └── plots.py <- Code to create visualizations
58
  ```
59
 
 
 
 
 
 
60
  --------
61
 
62
  ---
@@ -93,6 +110,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
93
  *** https://www.markdownguide.org/basic-syntax/#reference-style-links
94
  -->
95
  [![Contributors][contributors-shield]][contributors-url]
 
96
  [![Forks][forks-shield]][forks-url]
97
  [![Stargazers][stars-shield]][stars-url]
98
  [![Issues][issues-shield]][issues-url]
@@ -236,7 +254,7 @@ _For more examples, please refer to the [Documentation](https://example.com)_
236
  - [ ] Feature 3
237
  - [ ] Nested Feature
238
 
239
- See the [open issues](https://github.com/github_username/repo_name/issues) for a full list of proposed features (and known issues).
240
 
241
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
242
 
@@ -299,18 +317,18 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
299
 
300
  <!-- MARKDOWN LINKS & IMAGES -->
301
  <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
302
- [contributors-shield]: https://img.shields.io/github/contributors/github_username/repo_name.svg?style=for-the-badge
303
- [contributors-url]: https://github.com/github_username/repo_name/graphs/contributors
304
- [forks-shield]: https://img.shields.io/github/forks/github_username/repo_name.svg?style=for-the-badge
305
- [forks-url]: https://github.com/github_username/repo_name/network/members
306
- [stars-shield]: https://img.shields.io/github/stars/github_username/repo_name.svg?style=for-the-badge
307
- [stars-url]: https://github.com/github_username/repo_name/stargazers
308
- [issues-shield]: https://img.shields.io/github/issues/github_username/repo_name.svg?style=for-the-badge
309
- [issues-url]: https://github.com/github_username/repo_name/issues
310
- [license-shield]: https://img.shields.io/github/license/github_username/repo_name.svg?style=for-the-badge
311
- [license-url]: https://github.com/github_username/repo_name/blob/master/LICENSE.txt
312
  [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
313
- [linkedin-url]: https://linkedin.com/in/linkedin_username
314
  [product-screenshot]: images/screenshot.png
315
  [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
316
  <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
@@ -331,10 +349,8 @@ Project Link: [https://github.com/github_username/repo_name](https://github.com/
331
  [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
332
  [JQuery-url]: https://jquery.com
333
  <!-- TODO: -->
334
- [![Postgres](https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white)](#)
335
- [![Python](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)](#)
336
- [![Sphinx](https://img.shields.io/badge/Sphinx-000?logo=sphinx&logoColor=fff)](#)
337
- [![MkDocs](https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff)](#)
338
- [![NumPy](https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff)](#)
339
  [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
340
- [![Slack](https://img.shields.io/badge/Slack-4A154B?logo=slack&logoColor=fff)](#)[text](../projet_04/.gitignore)
 
1
  # projet_05
2
 
3
+ ---
4
+ title: OCR_Projet05
5
+ emoji: 🔥
6
+ colorFrom: purple
7
+ colorTo: purple
8
+ sdk: gradio
9
+ sdk_version: 5.49.1
10
+ app_file: app.py
11
+ pinned: true
12
+ short_description: Projet 05 formation Openclassrooms
13
+ ---
14
+
15
  <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
16
  <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
17
  </a>
 
69
  └── plots.py <- Code to create visualizations
70
  ```
71
 
72
+ ## Code hérité réutilisé
73
+
74
+ - `scripts_projet04/brand` : charte graphique OpenClassrooms (classe `Theme`, palettes, YAML). Le module `projet_05/branding.py` en est la porte d'entrée et applique automatiquement le thème.
75
+ - `scripts_projet04/manet_projet04/shap_generator.py` : fonctions `shap_global` / `shap_local` utilisées par `projet_05/modeling/train.py` pour reproduire les visualisations SHAP.
76
+
77
  --------
78
 
79
  ---
 
110
  *** https://www.markdownguide.org/basic-syntax/#reference-style-links
111
  -->
112
  [![Contributors][contributors-shield]][contributors-url]
113
+ [![Python][python]][python]
114
  [![Forks][forks-shield]][forks-url]
115
  [![Stargazers][stars-shield]][stars-url]
116
  [![Issues][issues-shield]][issues-url]
 
254
  - [ ] Feature 3
255
  - [ ] Nested Feature
256
 
257
+ See the [open issues](https://github.com/stephmnt/OCR_projet05/issues) for a full list of proposed features (and known issues).
258
 
259
  <p align="right">(<a href="#readme-top">back to top</a>)</p>
260
 
 
317
 
318
  <!-- MARKDOWN LINKS & IMAGES -->
319
  <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
320
+ [contributors-shield]: https://img.shields.io/github/contributors/stephmnt/OCR_projet05.svg?style=for-the-badge
321
+ [contributors-url]: https://github.com/stephmnt/OCR_projet05/graphs/contributors
322
+ [forks-shield]: https://img.shields.io/github/forks/stephmnt/OCR_projet05.svg?style=for-the-badge
323
+ [forks-url]: https://github.com/stephmnt/OCR_projet05/network/members
324
+ [stars-shield]: https://img.shields.io/github/stars/stephmnt/OCR_projet05.svg?style=for-the-badge
325
+ [stars-url]: https://github.com/stephmnt/OCR_projet05/stargazers
326
+ [issues-shield]: https://img.shields.io/github/issues/stephmnt/OCR_projet05.svg?style=for-the-badge
327
+ [issues-url]: https://github.com/stephmnt/OCR_projet05/issues
328
+ [license-shield]: https://img.shields.io/github/license/stephmnt/OCR_projet05.svg?style=for-the-badge
329
+ [license-url]: https://github.com/stephmnt/OCR_projet05/blob/master/LICENSE.txt
330
  [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
331
+ [linkedin-url]: https://linkedin.com/in/stephanemanet
332
  [product-screenshot]: images/screenshot.png
333
  [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
334
  <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
 
349
  [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
350
  [JQuery-url]: https://jquery.com
351
  <!-- TODO: -->
352
+ [Postgres]: https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white
353
+ [Python]: https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)
354
+ [MkDocs]: https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff
355
+ [NumPy]: https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff
 
356
  [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/LICENSE ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ The MIT License (MIT)
3
+ Copyright (c) 2025, Stéphane Manet
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10
+
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Makefile ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #################################################################################
2
+ # GLOBALS #
3
+ #################################################################################
4
+
5
+ PROJECT_NAME = OCR_projet05
6
+ PYTHON_VERSION = 3.10
7
+ PYTHON_INTERPRETER = python
8
+
9
+ #################################################################################
10
+ # COMMANDS #
11
+ #################################################################################
12
+
13
+
14
+ ## Install Python dependencies
15
+ .PHONY: requirements
16
+ requirements:
17
+ pip install -e .
18
+
19
+
20
+
21
+
22
+ ## Delete all compiled Python files
23
+ .PHONY: clean
24
+ clean:
25
+ find . -type f -name "*.py[co]" -delete
26
+ find . -type d -name "__pycache__" -delete
27
+
28
+
29
+ ## Lint using ruff (use `make format` to do formatting)
30
+ .PHONY: lint
31
+ lint:
32
+ ruff format --check
33
+ ruff check
34
+
35
+ ## Format source code with ruff
36
+ .PHONY: format
37
+ format:
38
+ ruff check --fix
39
+ ruff format
40
+
41
+
42
+
43
+ ## Run tests
44
+ .PHONY: test
45
+ test:
46
+ python -m pytest tests
47
+
48
+
49
+ ## Set up Python interpreter environment
50
+ .PHONY: create_environment
51
+ create_environment:
52
+ @bash -c "if [ ! -z `which virtualenvwrapper.sh` ]; then source `which virtualenvwrapper.sh`; mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); else mkvirtualenv.bat $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER); fi"
53
+ @echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
54
+
55
+
56
+
57
+
58
+ #################################################################################
59
+ # PROJECT RULES #
60
+ #################################################################################
61
+
62
+
63
+ ## Make dataset
64
+ .PHONY: data
65
+ data: requirements
66
+ $(PYTHON_INTERPRETER) projet_05/dataset.py
67
+
68
+
69
+ #################################################################################
70
+ # Self Documenting Commands #
71
+ #################################################################################
72
+
73
+ .DEFAULT_GOAL := help
74
+
75
+ define PRINT_HELP_PYSCRIPT
76
+ import re, sys; \
77
+ lines = '\n'.join([line for line in sys.stdin]); \
78
+ matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
79
+ print('Available rules:\n'); \
80
+ print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
81
+ endef
82
+ export PRINT_HELP_PYSCRIPT
83
+
84
+ help:
85
+ @$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py CHANGED
@@ -1,7 +1,181 @@
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
  import gradio as gr
8
+ import pandas as pd
9
+ from loguru import logger
10
+
11
+ from projet_05.branding import apply_brand_theme
12
+ from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
13
+
14
+ MODEL_PATH = Path("models/best_model.joblib")
15
+ METADATA_PATH = Path("models/best_model_meta.json")
16
+ SCHEMA_PATH = Path("data/processed/schema.json")
17
+
18
+
19
+ def _load_schema(path: Path) -> dict[str, Any]:
20
+ if not path.exists():
21
+ return {}
22
+ return json.loads(path.read_text(encoding="utf-8"))
23
+
24
+
25
+ def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
26
+ if schema:
27
+ candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
28
+ if candidates:
29
+ return candidates
30
+ features = metadata.get("features", {})
31
+ explicit = (features.get("numerical") or []) + (features.get("categorical") or [])
32
+ if explicit:
33
+ return explicit
34
+ if pipeline is not None and hasattr(pipeline, "feature_names_in_"):
35
+ return list(pipeline.feature_names_in_)
36
+ return []
37
+
38
+
39
+ def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
40
+ if isinstance(payload, pd.DataFrame):
41
+ df = payload.copy()
42
+ elif payload is None:
43
+ df = pd.DataFrame(columns=headers)
44
+ else:
45
+ df = pd.DataFrame(payload, columns=headers if headers else None)
46
+ df = df.dropna(how="all")
47
+ if df.empty:
48
+ raise gr.Error("Merci de saisir au moins une ligne complète.")
49
+ return df
50
+
51
+
52
+ def _ensure_model():
53
+ if PIPELINE is None:
54
+ raise gr.Error(
55
+ "Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
56
+ )
57
+
58
+
59
+ def score_table(table):
60
+ _ensure_model()
61
+ df = _convert_input(table, FEATURE_ORDER)
62
+ drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
63
+ return run_inference(
64
+ df,
65
+ PIPELINE,
66
+ THRESHOLD,
67
+ drop_columns=drop_cols,
68
+ required_features=FEATURE_ORDER or None,
69
+ )
70
+
71
+
72
+ def score_csv(upload):
73
+ _ensure_model()
74
+ if upload is None:
75
+ raise gr.Error("Veuillez déposer un fichier CSV.")
76
+ df = pd.read_csv(upload.name)
77
+ drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
78
+ return run_inference(
79
+ df,
80
+ PIPELINE,
81
+ THRESHOLD,
82
+ drop_columns=drop_cols,
83
+ required_features=FEATURE_ORDER or None,
84
+ )
85
+
86
+
87
+ def predict_from_form(*values):
88
+ _ensure_model()
89
+ if not FEATURE_ORDER:
90
+ raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
91
+ payload = {feature: value for feature, value in zip(FEATURE_ORDER, values)}
92
+ df = pd.DataFrame([payload])
93
+ scored = run_inference(
94
+ df,
95
+ PIPELINE,
96
+ THRESHOLD,
97
+ required_features=FEATURE_ORDER or None,
98
+ )
99
+ row = scored.iloc[0]
100
+ label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
101
+ return {
102
+ "probability": round(float(row["proba_depart"]), 4),
103
+ "decision": label,
104
+ "threshold": THRESHOLD,
105
+ }
106
+
107
+
108
+ # Chargement des artéfacts
109
+ apply_brand_theme()
110
+
111
+ PIPELINE = None
112
+ METADATA: dict[str, Any] = {}
113
+ THRESHOLD = 0.5
114
+ TARGET_COLUMN: str | None = None
115
+ SCHEMA = _load_schema(SCHEMA_PATH)
116
+
117
+ try:
118
+ PIPELINE = load_pipeline(MODEL_PATH)
119
+ METADATA = load_metadata(METADATA_PATH)
120
+ THRESHOLD = float(METADATA.get("best_threshold", THRESHOLD))
121
+ TARGET_COLUMN = METADATA.get("target")
122
+ except FileNotFoundError as exc:
123
+ logger.warning("Artéfact manquant: {}", exc)
124
+
125
+ FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
126
+
127
+ with gr.Blocks(title="Prédicteur d'attrition") as demo:
128
+ gr.Markdown("# API Gradio – Prédiction de départ employé")
129
+ gr.Markdown(
130
+ "Le modèle applique le pipeline entraîné hors-notebook pour fournir une probabilité de départ ainsi qu'une décision binaire."
131
+ )
132
+
133
+ if PIPELINE is None:
134
+ gr.Markdown(
135
+ "⚠️ **Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
136
+ )
137
+ else:
138
+ gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
139
+
140
+ with gr.Tab("Formulaire unitaire"):
141
+ if not FEATURE_ORDER:
142
+ gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
143
+ else:
144
+ form_inputs: list[gr.components.Component] = [] # type: ignore
145
+ for feature in FEATURE_ORDER:
146
+ form_inputs.append(
147
+ gr.Textbox(label=feature, placeholder=f"Saisir {feature.replace('_', ' ')}")
148
+ )
149
+ form_output = gr.JSON(label="Résultat")
150
+ gr.Button("Prédire").click(
151
+ fn=predict_from_form,
152
+ inputs=form_inputs,
153
+ outputs=form_output,
154
+ )
155
+
156
+ with gr.Tab("Tableau interactif"):
157
+ table_input = gr.Dataframe(
158
+ headers=FEATURE_ORDER if FEATURE_ORDER else None,
159
+ row_count=(1, "dynamic"),
160
+ col_count=(len(FEATURE_ORDER), "dynamic") if FEATURE_ORDER else (5, "dynamic"),
161
+ type="pandas",
162
+ )
163
+ table_output = gr.Dataframe(label="Prédictions", type="pandas")
164
+ gr.Button("Scorer les lignes").click(
165
+ fn=score_table,
166
+ inputs=table_input,
167
+ outputs=table_output,
168
+ )
169
+
170
+ with gr.Tab("Fichier CSV"):
171
+ file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
172
+ file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
173
+ gr.Button("Scorer le fichier").click(
174
+ fn=score_csv,
175
+ inputs=file_input,
176
+ outputs=file_output,
177
+ )
178
 
 
 
179
 
180
+ if __name__ == "__main__":
181
+ demo.launch()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml CHANGED
@@ -1,10 +1,13 @@
1
- name: Déployer vers Hugging Face Spaces
2
 
3
  on:
4
  push:
5
  branches:
6
  - main
7
 
 
 
 
8
  jobs:
9
  deploy:
10
  runs-on: ubuntu-latest
@@ -23,7 +26,7 @@ jobs:
23
  python -m pip install --upgrade pip
24
  if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
25
 
26
- - name: Push to Hugging Face Space
27
  env:
28
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
29
  run: |
@@ -33,5 +36,5 @@ jobs:
33
  rsync -av --exclude '.git' ./ hf_space/
34
  cd hf_space
35
  git add .
36
- git commit -m "🚀 Auto-deploy from GitHub Actions"
37
- git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
 
1
+ name: Deploy to Hugging Face Spaces
2
 
3
  on:
4
  push:
5
  branches:
6
  - main
7
 
8
+ permissions:
9
+ contents: write
10
+
11
  jobs:
12
  deploy:
13
  runs-on: ubuntu-latest
 
26
  python -m pip install --upgrade pip
27
  if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28
 
29
+ - name: Deploy to Hugging Face Space
30
  env:
31
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
32
  run: |
 
36
  rsync -av --exclude '.git' ./ hf_space/
37
  cd hf_space
38
  git add .
39
+ git commit -m "🚀 Auto-deploy from GitHub Actions" || echo "No changes to commit"
40
+ git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore CHANGED
@@ -1,2 +1,192 @@
 
 
 
 
 
1
  *.code-workspace
2
- .venv/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Data
2
+ /data/
3
+
4
+ # Mac OS-specific storage files
5
+ .DS_Store
6
  *.code-workspace
7
+
8
+ # vim
9
+ *.swp
10
+ *.swo
11
+
12
+ ## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
13
+
14
+ # Byte-compiled / optimized / DLL files
15
+ __pycache__/
16
+ *.py[cod]
17
+ *$py.class
18
+
19
+ # C extensions
20
+ *.so
21
+
22
+ # Distribution / packaging
23
+ .Python
24
+ build/
25
+ develop-eggs/
26
+ dist/
27
+ downloads/
28
+ eggs/
29
+ .eggs/
30
+ lib/
31
+ lib64/
32
+ parts/
33
+ sdist/
34
+ var/
35
+ wheels/
36
+ share/python-wheels/
37
+ *.egg-info/
38
+ .installed.cfg
39
+ *.egg
40
+ MANIFEST
41
+
42
+ # PyInstaller
43
+ # Usually these files are written by a python script from a template
44
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
45
+ *.manifest
46
+ *.spec
47
+
48
+ # Installer logs
49
+ pip-log.txt
50
+ pip-delete-this-directory.txt
51
+
52
+ # Unit test / coverage reports
53
+ htmlcov/
54
+ .tox/
55
+ .nox/
56
+ .coverage
57
+ .coverage.*
58
+ .cache
59
+ nosetests.xml
60
+ coverage.xml
61
+ *.cover
62
+ *.py,cover
63
+ .hypothesis/
64
+ .pytest_cache/
65
+ cover/
66
+
67
+ # Translations
68
+ *.mo
69
+ *.pot
70
+
71
+ # Django stuff:
72
+ *.log
73
+ local_settings.py
74
+ db.sqlite3
75
+ db.sqlite3-journal
76
+
77
+ # Flask stuff:
78
+ instance/
79
+ .webassets-cache
80
+
81
+ # Scrapy stuff:
82
+ .scrapy
83
+
84
+ # MkDocs documentation
85
+ docs/site/
86
+
87
+ # PyBuilder
88
+ .pybuilder/
89
+ target/
90
+
91
+ # Jupyter Notebook
92
+ .ipynb_checkpoints
93
+
94
+ # IPython
95
+ profile_default/
96
+ ipython_config.py
97
+
98
+ # pyenv
99
+ # For a library or package, you might want to ignore these files since the code is
100
+ # intended to run in multiple environments; otherwise, check them in:
101
+ # .python-version
102
+
103
+ # pipenv
104
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
105
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
106
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
107
+ # install all needed dependencies.
108
+ #Pipfile.lock
109
+
110
+ # UV
111
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
112
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
113
+ # commonly ignored for libraries.
114
+ #uv.lock
115
+
116
+ # poetry
117
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
118
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
119
+ # commonly ignored for libraries.
120
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
121
+ #poetry.lock
122
+
123
+ # pdm
124
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
125
+ #pdm.lock
126
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
127
+ # in version control.
128
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
129
+ .pdm.toml
130
+ .pdm-python
131
+ .pdm-build/
132
+
133
+ # pixi
134
+ # pixi.lock should be committed to version control for reproducibility
135
+ # .pixi/ contains the environments and should not be committed
136
+ .pixi/
137
+
138
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
139
+ __pypackages__/
140
+
141
+ # Celery stuff
142
+ celerybeat-schedule
143
+ celerybeat.pid
144
+
145
+ # SageMath parsed files
146
+ *.sage.py
147
+
148
+ # Environments
149
+ .env
150
+ .venv
151
+ env/
152
+ venv/
153
+ ENV/
154
+ env.bak/
155
+ venv.bak/
156
+
157
+ # Spyder project settings
158
+ .spyderproject
159
+ .spyproject
160
+
161
+ # Rope project settings
162
+ .ropeproject
163
+
164
+ # mkdocs documentation
165
+ /site
166
+
167
+ # mypy
168
+ .mypy_cache/
169
+ .dmypy.json
170
+ dmypy.json
171
+
172
+ # Pyre type checker
173
+ .pyre/
174
+
175
+ # pytype static type analyzer
176
+ .pytype/
177
+
178
+ # Cython debug symbols
179
+ cython_debug/
180
+
181
+ # PyCharm
182
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
183
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
184
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
185
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
186
+ #.idea/
187
+
188
+ # Ruff stuff:
189
+ .ruff_cache/
190
+
191
+ # PyPI configuration file
192
+ .pypirc
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md CHANGED
@@ -1,3 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Projet 05
3
  emoji: 👀
@@ -10,3 +71,270 @@ pinned: false
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # projet_05
2
+
3
+ <a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
4
+ <img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
5
+ </a>
6
+
7
+ Déployez un modèle de Machine Learning
8
+
9
+ ## Organisation du projet
10
+
11
+ ```
12
+ ├── LICENSE <- Open-source license if one is chosen
13
+ ├── Makefile <- Makefile with convenience commands like `make data` or `make train`
14
+ ├── README.md <- The top-level README for developers using this project.
15
+ ├── data
16
+ │ ├── external <- Data from third party sources.
17
+ │ ├── interim <- Intermediate data that has been transformed.
18
+ │ ├── processed <- The final, canonical data sets for modeling.
19
+ │ └── raw <- The original, immutable data dump.
20
+
21
+ ├── docs <- A default mkdocs project; see www.mkdocs.org for details
22
+
23
+ ├── models <- Trained and serialized models, model predictions, or model summaries
24
+
25
+ ├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
26
+ │ the creator's initials, and a short `-` delimited description, e.g.
27
+ │ `1.0-jqp-initial-data-exploration`.
28
+
29
+ ├── pyproject.toml <- Project configuration file with package metadata for
30
+ │ projet_05 and configuration for tools like black
31
+
32
+ ├── references <- Data dictionaries, manuals, and all other explanatory materials.
33
+
34
+ ├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
35
+ │ └── figures <- Generated graphics and figures to be used in reporting
36
+
37
+ ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
38
+ │ generated with `pip freeze > requirements.txt`
39
+
40
+ ├── setup.cfg <- Configuration file for flake8
41
+
42
+ └── projet_05 <- Source code for use in this project.
43
+
44
+ ├── __init__.py <- Makes projet_05 a Python module
45
+
46
+ ├── config.py <- Store useful variables and configuration
47
+
48
+ ├── dataset.py <- Scripts to download or generate data
49
+
50
+ ├── features.py <- Code to create features for modeling
51
+
52
+ ├── modeling
53
+ │ ├── __init__.py
54
+ │ ├── predict.py <- Code to run model inference with trained models
55
+ │ └── train.py <- Code to train models
56
+
57
+ └── plots.py <- Code to create visualizations
58
+ ```
59
+
60
+ --------
61
+
62
  ---
63
  title: Projet 05
64
  emoji: 👀
 
71
  ---
72
 
73
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
74
+
75
+ <!-- Improved compatibility of back to top link: See: https://github.com/othneildrew/Best-README-Template/pull/73 -->
76
+ <a id="readme-top"></a>
77
+ <!--
78
+ *** Thanks for checking out the Best-README-Template. If you have a suggestion
79
+ *** that would make this better, please fork the repo and create a pull request
80
+ *** or simply open an issue with the tag "enhancement".
81
+ *** Don't forget to give the project a star!
82
+ *** Thanks again! Now go create something AMAZING! :D
83
+ -->
84
+
85
+
86
+
87
+ <!-- PROJECT SHIELDS -->
88
+ <!--
89
+ *** I'm using markdown "reference style" links for readability.
90
+ *** Reference links are enclosed in brackets [ ] instead of parentheses ( ).
91
+ *** See the bottom of this document for the declaration of the reference variables
92
+ *** for contributors-url, forks-url, etc. This is an optional, concise syntax you may use.
93
+ *** https://www.markdownguide.org/basic-syntax/#reference-style-links
94
+ -->
95
+ [![Contributors][contributors-shield]][contributors-url]
96
+ [![Forks][forks-shield]][forks-url]
97
+ [![Stargazers][stars-shield]][stars-url]
98
+ [![Issues][issues-shield]][issues-url]
99
+ [![project_license][license-shield]][license-url]
100
+ [![LinkedIn][linkedin-shield]][linkedin-url]
101
+ ![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/:user/:repo/:workflow)
102
+
103
+
104
+
105
+ <!-- PROJECT LOGO -->
106
+ <br />
107
+ <div align="center">
108
+ <a href="https://github.com/github_username/repo_name">
109
+ <img src="images/logo.png" alt="Logo" width="80" height="80">
110
+ </a>
111
+
112
+ <h3 align="center">project_title</h3>
113
+
114
+ <p align="center">
115
+ project_description
116
+ <br />
117
+ <a href="https://github.com/github_username/repo_name"><strong>Explore the docs »</strong></a>
118
+ <br />
119
+ <br />
120
+ <a href="https://github.com/github_username/repo_name">View Demo</a>
121
+ &middot;
122
+ <a href="https://github.com/github_username/repo_name/issues/new?labels=bug&template=bug-report---.md">Report Bug</a>
123
+ &middot;
124
+ <a href="https://github.com/github_username/repo_name/issues/new?labels=enhancement&template=feature-request---.md">Request Feature</a>
125
+ </p>
126
+ </div>
127
+
128
+
129
+
130
+ <!-- TABLE OF CONTENTS -->
131
+ <details>
132
+ <summary>Table of Contents</summary>
133
+ <ol>
134
+ <li>
135
+ <a href="#about-the-project">About The Project</a>
136
+ <ul>
137
+ <li><a href="#built-with">Built With</a></li>
138
+ </ul>
139
+ </li>
140
+ <li>
141
+ <a href="#getting-started">Getting Started</a>
142
+ <ul>
143
+ <li><a href="#prerequisites">Prerequisites</a></li>
144
+ <li><a href="#installation">Installation</a></li>
145
+ </ul>
146
+ </li>
147
+ <li><a href="#usage">Usage</a></li>
148
+ <li><a href="#roadmap">Roadmap</a></li>
149
+ <li><a href="#contributing">Contributing</a></li>
150
+ <li><a href="#license">License</a></li>
151
+ <li><a href="#contact">Contact</a></li>
152
+ <li><a href="#acknowledgments">Acknowledgments</a></li>
153
+ </ol>
154
+ </details>
155
+
156
+
157
+
158
+ <!-- ABOUT THE PROJECT -->
159
+ ## About The Project
160
+
161
+ [![Product Name Screen Shot][product-screenshot]](https://example.com)
162
+
163
+ Here's a blank template to get started. To avoid retyping too much info, do a search and replace with your text editor for the following: `github_username`, `repo_name`, `twitter_handle`, `linkedin_username`, `email_client`, `email`, `project_title`, `project_description`, `project_license`
164
+
165
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
166
+
167
+
168
+
169
+ ### Built With
170
+
171
+ * [![Python][Python]][Python-url]
172
+ * [![SQL][SQL]][SQL-url]
173
+
174
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
175
+
176
+
177
+
178
+ <!-- GETTING STARTED -->
179
+ ## Getting Started
180
+
181
+ This is an example of how you may give instructions on setting up your project locally.
182
+ To get a local copy up and running follow these simple example steps.
183
+
184
+ ### Prerequisites
185
+
186
+ This is an example of how to list things you need to use the software and how to install them.
187
+ * npm
188
+ ```sh
189
+ npm install npm@latest -g
190
+ ```
191
+
192
+ ### Installation
193
+
194
+ pip install -r requirements.txt
195
+ uvicorn app.main:app --reload
196
+
197
+ 1. Get a free API Key at [https://example.com](https://example.com)
198
+ 2. Clone the repo
199
+ ```sh
200
+ git clone https://github.com/github_username/repo_name.git
201
+ ```
202
+ 3. Install NPM packages
203
+ ```sh
204
+ npm install
205
+ ```
206
+ 4. Enter your API in `config.js`
207
+ ```js
208
+ const API_KEY = 'ENTER YOUR API';
209
+ ```
210
+ 5. Change git remote url to avoid accidental pushes to base project
211
+ ```sh
212
+ git remote set-url origin github_username/repo_name
213
+ git remote -v # confirm the changes
214
+ ```
215
+
216
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
217
+
218
+
219
+
220
+ <!-- USAGE EXAMPLES -->
221
+ ## Usage
222
+
223
+ Use this space to show useful examples of how a project can be used. Additional screenshots, code examples and demos work well in this space. You may also link to more resources.
224
+
225
+ _For more examples, please refer to the [Documentation](https://example.com)_
226
+
227
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
228
+
229
+
230
+
231
+ <!-- ROADMAP -->
232
+ ## Roadmap
233
+
234
+ - [ ] Feature 1
235
+ - [ ] Feature 2
236
+ - [ ] Feature 3
237
+ - [ ] Nested Feature
238
+
239
+ See the [open issues](https://github.com/github_username/repo_name/issues) for a full list of proposed features (and known issues).
240
+
241
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
242
+
243
+
244
+
245
+ <!-- CONTRIBUTING -->
246
+ ## Contributing
247
+
248
+ Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**.
249
+
250
+ If you have a suggestion that would make this better, please fork the repo and create a pull request. You can also simply open an issue with the tag "enhancement".
251
+ Don't forget to give the project a star! Thanks again!
252
+
253
+ 1. Fork the Project
254
+ 2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
255
+ 3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
256
+ 4. Push to the Branch (`git push origin feature/AmazingFeature`)
257
+ 5. Open a Pull Request
258
+
259
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
260
+
261
+ ### Top contributors:
262
+
263
+ <a href="https://github.com/github_username/repo_name/graphs/contributors">
264
+ <img src="https://contrib.rocks/image?repo=github_username/repo_name" alt="contrib.rocks image" />
265
+ </a>
266
+
267
+
268
+
269
+ <!-- LICENSE -->
270
+ ## License
271
+
272
+ Distributed under the project_license. See `LICENSE.txt` for more information.
273
+
274
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
275
+
276
+
277
+
278
+ <!-- CONTACT -->
279
+ ## Contact
280
+
281
+ Your Name - [@twitter_handle](https://twitter.com/twitter_handle) - email@email_client.com
282
+
283
+ Project Link: [https://github.com/github_username/repo_name](https://github.com/github_username/repo_name)
284
+
285
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
286
+
287
+
288
+
289
+ <!-- ACKNOWLEDGMENTS -->
290
+ ## Acknowledgments
291
+
292
+ * []()
293
+ * []()
294
+ * []()
295
+
296
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
297
+
298
+
299
+
300
+ <!-- MARKDOWN LINKS & IMAGES -->
301
+ <!-- https://www.markdownguide.org/basic-syntax/#reference-style-links -->
302
+ [contributors-shield]: https://img.shields.io/github/contributors/github_username/repo_name.svg?style=for-the-badge
303
+ [contributors-url]: https://github.com/github_username/repo_name/graphs/contributors
304
+ [forks-shield]: https://img.shields.io/github/forks/github_username/repo_name.svg?style=for-the-badge
305
+ [forks-url]: https://github.com/github_username/repo_name/network/members
306
+ [stars-shield]: https://img.shields.io/github/stars/github_username/repo_name.svg?style=for-the-badge
307
+ [stars-url]: https://github.com/github_username/repo_name/stargazers
308
+ [issues-shield]: https://img.shields.io/github/issues/github_username/repo_name.svg?style=for-the-badge
309
+ [issues-url]: https://github.com/github_username/repo_name/issues
310
+ [license-shield]: https://img.shields.io/github/license/github_username/repo_name.svg?style=for-the-badge
311
+ [license-url]: https://github.com/github_username/repo_name/blob/master/LICENSE.txt
312
+ [linkedin-shield]: https://img.shields.io/badge/-LinkedIn-black.svg?style=for-the-badge&logo=linkedin&colorB=555
313
+ [linkedin-url]: https://linkedin.com/in/linkedin_username
314
+ [product-screenshot]: images/screenshot.png
315
+ [Noobie]: https://img.shields.io/badge/Data%20Science%20for%20Beginners-84CC16?style=for-the-badge&labelColor=E5E7EB&color=84CC16
316
+ <!-- Shields.io badges. You can a comprehensive list with many more badges at: https://github.com/inttter/md-badges -->
317
+ [Next.js]: https://img.shields.io/badge/next.js-000000?style=for-the-badge&logo=nextdotjs&logoColor=white
318
+ [Next-url]: https://nextjs.org/
319
+ [React.js]: https://img.shields.io/badge/React-20232A?style=for-the-badge&logo=react&logoColor=61DAFB
320
+ [React-url]: https://reactjs.org/
321
+ [Vue.js]: https://img.shields.io/badge/Vue.js-35495E?style=for-the-badge&logo=vuedotjs&logoColor=4FC08D
322
+ [Vue-url]: https://vuejs.org/
323
+ [Angular.io]: https://img.shields.io/badge/Angular-DD0031?style=for-the-badge&logo=angular&logoColor=white
324
+ [Angular-url]: https://angular.io/
325
+ [Svelte.dev]: https://img.shields.io/badge/Svelte-4A4A55?style=for-the-badge&logo=svelte&logoColor=FF3E00
326
+ [Svelte-url]: https://svelte.dev/
327
+ [Laravel.com]: https://img.shields.io/badge/Laravel-FF2D20?style=for-the-badge&logo=laravel&logoColor=white
328
+ [Laravel-url]: https://laravel.com
329
+ [Bootstrap.com]: https://img.shields.io/badge/Bootstrap-563D7C?style=for-the-badge&logo=bootstrap&logoColor=white
330
+ [Bootstrap-url]: https://getbootstrap.com
331
+ [JQuery.com]: https://img.shields.io/badge/jQuery-0769AD?style=for-the-badge&logo=jquery&logoColor=white
332
+ [JQuery-url]: https://jquery.com
333
+ <!-- TODO: -->
334
+ [![Postgres](https://img.shields.io/badge/Postgres-%23316192.svg?logo=postgresql&logoColor=white)](#)
335
+ [![Python](https://img.shields.io/badge/Python-3776AB?logo=python&logoColor=fff)](#)
336
+ [![Sphinx](https://img.shields.io/badge/Sphinx-000?logo=sphinx&logoColor=fff)](#)
337
+ [![MkDocs](https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff)](#)
338
+ [![NumPy](https://img.shields.io/badge/NumPy-4DABCF?logo=numpy&logoColor=fff)](#)
339
+ [![Pandas](https://img.shields.io/badge/Pandas-150458?logo=pandas&logoColor=fff)](#)
340
+ [![Slack](https://img.shields.io/badge/Slack-4A154B?logo=slack&logoColor=fff)](#)[text](../projet_04/.gitignore)
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/__init__.py ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app/main.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ demo.launch()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.github/workflows/deploy.yml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Déployer vers Hugging Face Spaces
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout repository
14
+ uses: actions/checkout@v4
15
+
16
+ - name: Setup Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.10"
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
25
+
26
+ - name: Push to Hugging Face Space
27
+ env:
28
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
29
+ run: |
30
+ git config --global user.email "actions@github.com"
31
+ git config --global user.name "GitHub Actions"
32
+ git clone https://huggingface.co/spaces/stephmnt/projet_05 hf_space
33
+ rsync -av --exclude '.git' ./ hf_space/
34
+ cd hf_space
35
+ git add .
36
+ git commit -m "🚀 Auto-deploy from GitHub Actions"
37
+ git push https://stephmnt:$HF_TOKEN@huggingface.co/spaces/stephmnt/projet_05 main
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.code-workspace
2
+ .venv/
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Projet 05
3
+ emoji: 👀
4
+ colorFrom: indigo
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 5.49.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
+
6
+ demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
+ demo.launch()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from app.main import greet
3
+
4
+ def test_greet_returns_string():
5
+ """Vérifie que la fonction retourne bien une chaîne de caractères."""
6
+ result = greet("Alice")
7
+ assert isinstance(result, str), "Le résultat doit être une chaîne de caractères."
8
+
9
+ def test_greet_output_content():
10
+ """Vérifie que la fonction génère la phrase attendue."""
11
+ result = greet("Bob")
12
+ assert result == "Hello Bob!!", f"Résultat inattendu : {result}"
13
+
14
+ def test_greet_with_empty_string():
15
+ """Vérifie le comportement si l’entrée est vide."""
16
+ result = greet("")
17
+ assert result == "Hello !!", "Le résultat doit gérer les entrées vides."
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/poetry.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [virtualenvs]
2
+ in-project = true
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from projet_05 import config # noqa: F401
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/config.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from dotenv import load_dotenv
4
+ from loguru import logger
5
+
6
+ # Load environment variables from .env file if it exists
7
+ load_dotenv()
8
+
9
+ # Paths
10
+ PROJ_ROOT = Path(__file__).resolve().parents[1]
11
+ logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")
12
+
13
+ DATA_DIR = PROJ_ROOT / "data"
14
+ RAW_DATA_DIR = DATA_DIR / "raw"
15
+ INTERIM_DATA_DIR = DATA_DIR / "interim"
16
+ PROCESSED_DATA_DIR = DATA_DIR / "processed"
17
+ EXTERNAL_DATA_DIR = DATA_DIR / "external"
18
+
19
+ MODELS_DIR = PROJ_ROOT / "models"
20
+
21
+ REPORTS_DIR = PROJ_ROOT / "reports"
22
+ FIGURES_DIR = REPORTS_DIR / "figures"
23
+
24
+ # If tqdm is installed, configure loguru with tqdm.write
25
+ # https://github.com/Delgan/loguru/issues/135
26
+ try:
27
+ from tqdm import tqdm
28
+
29
+ logger.remove(0)
30
+ logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
31
+ except ModuleNotFoundError:
32
+ pass
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ input_path: Path = RAW_DATA_DIR / "dataset.csv",
16
+ output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
17
+ # ----------------------------------------------
18
+ ):
19
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
+ logger.info("Processing dataset...")
21
+ for i in tqdm(range(10), total=10):
22
+ if i == 5:
23
+ logger.info("Something happened for iteration 5.")
24
+ logger.success("Processing dataset complete.")
25
+ # -----------------------------------------
26
+
27
+
28
+ if __name__ == "__main__":
29
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
16
+ output_path: Path = PROCESSED_DATA_DIR / "features.csv",
17
+ # -----------------------------------------
18
+ ):
19
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
+ logger.info("Generating features from dataset...")
21
+ for i in tqdm(range(10), total=10):
22
+ if i == 5:
23
+ logger.info("Something happened for iteration 5.")
24
+ logger.success("Features generation complete.")
25
+ # -----------------------------------------
26
+
27
+
28
+ if __name__ == "__main__":
29
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/__init__.py ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ features_path: Path = PROCESSED_DATA_DIR / "test_features.csv",
16
+ model_path: Path = MODELS_DIR / "model.pkl",
17
+ predictions_path: Path = PROCESSED_DATA_DIR / "test_predictions.csv",
18
+ # -----------------------------------------
19
+ ):
20
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
21
+ logger.info("Performing inference for model...")
22
+ for i in tqdm(range(10), total=10):
23
+ if i == 5:
24
+ logger.info("Something happened for iteration 5.")
25
+ logger.success("Inference complete.")
26
+ # -----------------------------------------
27
+
28
+
29
+ if __name__ == "__main__":
30
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ features_path: Path = PROCESSED_DATA_DIR / "features.csv",
16
+ labels_path: Path = PROCESSED_DATA_DIR / "labels.csv",
17
+ model_path: Path = MODELS_DIR / "model.pkl",
18
+ # -----------------------------------------
19
+ ):
20
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
21
+ logger.info("Training some model...")
22
+ for i in tqdm(range(10), total=10):
23
+ if i == 5:
24
+ logger.info("Something happened for iteration 5.")
25
+ logger.success("Modeling training complete.")
26
+ # -----------------------------------------
27
+
28
+
29
+ if __name__ == "__main__":
30
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/plots.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from tqdm import tqdm
5
+ import typer
6
+
7
+ from projet_05.config import FIGURES_DIR, PROCESSED_DATA_DIR
8
+
9
+ app = typer.Typer()
10
+
11
+
12
+ @app.command()
13
+ def main(
14
+ # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
+ input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
16
+ output_path: Path = FIGURES_DIR / "plot.png",
17
+ # -----------------------------------------
18
+ ):
19
+ # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
+ logger.info("Generating plot from data...")
21
+ for i in tqdm(range(10), total=10):
22
+ if i == 5:
23
+ logger.info("Something happened for iteration 5.")
24
+ logger.success("Plot generation complete.")
25
+ # -----------------------------------------
26
+
27
+
28
+ if __name__ == "__main__":
29
+ app()
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/pyproject.toml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["flit_core >=3.2,<4"]
3
+ build-backend = "flit_core.buildapi"
4
+
5
+ [project]
6
+ name = "projet_05"
7
+ version = "0.0.1"
8
+ description = "D\u00e9ployez un mod\u00e8le de Machine Learning"
9
+ authors = [
10
+ { name = "St\u00e9phane Manet" },
11
+ ]
12
+ license = { file = "LICENSE" }
13
+ readme = "README.md"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License"
17
+ ]
18
+ dependencies = [
19
+ "loguru",
20
+ "mkdocs",
21
+ "pip",
22
+ "pytest",
23
+ "python-dotenv",
24
+ "ruff",
25
+ "tqdm",
26
+ "typer",
27
+ "imbalanced-learn (>=0.14.0,<0.15.0)",
28
+ "scikit-learn (>=1.4.2,<2.0.0)",
29
+ "matplotlib (>=3.10.7,<4.0.0)",
30
+ "numpy (>=2.3.4,<3.0.0)",
31
+ "pandas (>=2.3.3,<3.0.0)",
32
+ "pyyaml (>=6.0.3,<7.0.0)",
33
+ "scipy (>=1.16.3,<2.0.0)",
34
+ "seaborn (>=0.13.2,<0.14.0)",
35
+ "shap (>=0.49.1,<0.50.0)",
36
+ "gradio (>=5.49.1,<6.0.0)",
37
+ "joblib (>=1.4.2,<2.0.0)"
38
+ ]
39
+
40
+ requires-python = ">=3.11,<3.13"
41
+
42
+
43
+ [tool.ruff]
44
+ line-length = 99
45
+ src = ["projet_05"]
46
+ include = ["pyproject.toml", "projet_05/**/*.py"]
47
+
48
+ [tool.ruff.lint]
49
+ extend-select = ["I"] # Add import sorting
50
+
51
+ [tool.ruff.lint.isort]
52
+ known-first-party = ["projet_05"]
53
+ force-sort-within-sections = true
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/references/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/reports/figures/.gitkeep ADDED
File without changes
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/tests/test_data.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import pytest
2
+
3
+
4
+ def test_code_is_tested():
5
+ assert False
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/notebooks/Manet_stephane_notebook_112025.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/__init__.py CHANGED
@@ -1 +1,4 @@
1
  from projet_05 import config # noqa: F401
 
 
 
 
1
  from projet_05 import config # noqa: F401
2
+ from projet_05.settings import Settings, load_settings # noqa: F401
3
+
4
+ __all__ = ["config", "Settings", "load_settings"]
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/branding.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from functools import lru_cache
4
+ from pathlib import Path
5
+ from typing import Union
6
+
7
+ from scripts_projet04.brand.brand import ( # type: ignore[import-not-found]
8
+ Theme,
9
+ ThemeConfig,
10
+ configure_brand,
11
+ load_brand,
12
+ make_diverging_cmap,
13
+ )
14
+
15
+ ROOT_DIR = Path(__file__).resolve().parents[1]
16
+ DEFAULT_BRAND_PATH = ROOT_DIR / "scripts_projet04" / "brand" / "brand.yml"
17
+
18
+
19
+ def _resolve_path(path: Union[str, Path, None]) -> Path:
20
+ if path is None:
21
+ return DEFAULT_BRAND_PATH
22
+ return Path(path).expanduser().resolve()
23
+
24
+
25
+ @lru_cache(maxsize=1)
26
+ def load_brand_config(path: Union[str, Path, None] = None) -> ThemeConfig:
27
+ """Load the brand YAML once and return the parsed ThemeConfig."""
28
+ cfg_path = _resolve_path(path)
29
+ return load_brand(cfg_path)
30
+
31
+
32
+ @lru_cache(maxsize=1)
33
+ def apply_brand_theme(path: Union[str, Path, None] = None) -> ThemeConfig:
34
+ """
35
+ Apply the OpenClassrooms/TechNova brand theme globally.
36
+
37
+ Returns the ThemeConfig so callers can inspect colors if needed.
38
+ """
39
+ cfg_path = _resolve_path(path)
40
+ cfg = configure_brand(cfg_path)
41
+ Theme.apply()
42
+ return cfg
43
+
44
+
45
+ __all__ = [
46
+ "Theme",
47
+ "ThemeConfig",
48
+ "apply_brand_theme",
49
+ "load_brand_config",
50
+ "make_diverging_cmap",
51
+ "DEFAULT_BRAND_PATH",
52
+ ]
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/dataset.py CHANGED
@@ -1,28 +1,202 @@
 
 
 
1
  from pathlib import Path
2
 
 
 
3
  from loguru import logger
4
- from tqdm import tqdm
5
  import typer
6
 
7
- from projet_05.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- app = typer.Typer()
 
 
 
 
10
 
 
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  @app.command()
13
  def main(
14
- # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
- input_path: Path = RAW_DATA_DIR / "dataset.csv",
16
- output_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
17
- # ----------------------------------------------
 
 
 
 
 
 
 
 
18
  ):
19
- # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
- logger.info("Processing dataset...")
21
- for i in tqdm(range(10), total=10):
22
- if i == 5:
23
- logger.info("Something happened for iteration 5.")
24
- logger.success("Processing dataset complete.")
25
- # -----------------------------------------
26
 
27
 
28
  if __name__ == "__main__":
 
1
+ from __future__ import annotations
2
+
3
+ import sqlite3
4
  from pathlib import Path
5
 
6
+ import numpy as np
7
+ import pandas as pd
8
  from loguru import logger
 
9
  import typer
10
 
11
+ from projet_05.config import INTERIM_DATA_DIR
12
+ from projet_05.settings import Settings, load_settings
13
+
14
+ app = typer.Typer(help="Préparation et fusion des données sources.")
15
+
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # Utilitaires
19
+ # ---------------------------------------------------------------------------
20
+ def safe_read_csv(path: Path, *, dtype=None) -> pd.DataFrame:
21
+ """Read a CSV file and return an empty frame when it fails."""
22
+ try:
23
+ logger.info("Lecture du fichier {}", path)
24
+ return pd.read_csv(path, dtype=dtype)
25
+ except FileNotFoundError:
26
+ logger.warning("Fichier absent: {}", path)
27
+ return pd.DataFrame()
28
+ except Exception as exc: # pragma: no cover - log + empty dataframe
29
+ logger.error("Impossible de lire {} ({})", path, exc)
30
+ return pd.DataFrame()
31
+
32
+
33
+ def clean_text_values(df: pd.DataFrame) -> pd.DataFrame:
34
+ """Normalize textual values that often materialize missing values."""
35
+ replace_tokens = [
36
+ "",
37
+ " ",
38
+ " ",
39
+ " ",
40
+ "nan",
41
+ "NaN",
42
+ "NAN",
43
+ "None",
44
+ "JE ne sais pas",
45
+ "je ne sais pas",
46
+ "Je ne sais pas",
47
+ "Unknow",
48
+ "Unknown",
49
+ "non pertinent",
50
+ "Non pertinent",
51
+ "NON PERTINENT",
52
+ ]
53
+ normalized = df.copy()
54
+ normalized = normalized.replace(replace_tokens, np.nan)
55
+
56
+ for column in normalized.select_dtypes(include="object"):
57
+ normalized[column] = (
58
+ normalized[column].replace(replace_tokens, np.nan).astype("string").str.strip()
59
+ )
60
+ return normalized
61
+
62
+
63
+ def _harmonize_id_column(df: pd.DataFrame, column: str, *, digits_only: bool = True) -> pd.DataFrame:
64
+ data = df.copy()
65
+ if column not in data.columns:
66
+ return data
67
+
68
+ if digits_only:
69
+ extracted = data[column].astype(str).str.extract(r"(\\d+)")
70
+ data[column] = pd.to_numeric(extracted[0], errors="coerce")
71
+ data[column] = pd.to_numeric(data[column], errors="coerce").astype("Int64")
72
+ return data
73
+
74
+
75
+ def _rename_column(df: pd.DataFrame, source: str, target: str) -> pd.DataFrame:
76
+ if source not in df.columns:
77
+ return df
78
+ return df.rename(columns={source: target})
79
+
80
+
81
+ def _log_id_diagnostics(df: pd.DataFrame, *, name: str, col_id: str) -> None:
82
+ if col_id not in df.columns:
83
+ logger.warning("La colonne {} est absente du fichier {}.", col_id, name)
84
+ return
85
+ total = len(df)
86
+ uniques = df[col_id].nunique(dropna=True)
87
+ duplicates = total - uniques
88
+ logger.info(
89
+ "{name}: {total} lignes | {uniques} identifiants uniques | {duplicates} doublons",
90
+ name=name,
91
+ total=total,
92
+ uniques=uniques,
93
+ duplicates=duplicates,
94
+ )
95
+
96
+
97
+ def _persist_sql_trace(df_dict: dict[str, pd.DataFrame], settings: Settings) -> pd.DataFrame:
98
+ """
99
+ Reproduire la fusion SQL décrite dans le notebook.
100
 
101
+ Chaque DataFrame est stocké dans une base SQLite éphémère pour
102
+ conserver une traçabilité de la requête exécutée.
103
+ """
104
+ db_path = settings.db_file
105
+ sql_path = settings.sql_file
106
 
107
+ db_path.parent.mkdir(parents=True, exist_ok=True)
108
+ sql_path.parent.mkdir(parents=True, exist_ok=True)
109
 
110
+ if db_path.exists():
111
+ db_path.unlink()
112
+
113
+ query = f"""
114
+ SELECT *
115
+ FROM sirh
116
+ INNER JOIN evaluation USING ({settings.col_id})
117
+ INNER JOIN sond USING ({settings.col_id});
118
+ """.strip()
119
+
120
+ with db_path.open("wb") as _:
121
+ pass # just ensure the file exists for sqlite on some platforms
122
+
123
+ with sqlite3.connect(db_path) as conn:
124
+ for name, frame in df_dict.items():
125
+ frame.to_sql(name, conn, index=False, if_exists="replace")
126
+ merged = pd.read_sql_query(query, conn)
127
+
128
+ sql_path.write_text(query, encoding="utf-8")
129
+ return merged
130
+
131
+
132
+ def build_dataset(settings: Settings) -> pd.DataFrame:
133
+ """Load, clean, harmonize and merge the three raw sources."""
134
+ sirh = clean_text_values(
135
+ safe_read_csv(settings.path_sirh).pipe(
136
+ _harmonize_id_column, settings.col_id, digits_only=True
137
+ )
138
+ )
139
+ evaluation = clean_text_values(
140
+ safe_read_csv(settings.path_eval)
141
+ .pipe(_rename_column, "eval_number", settings.col_id)
142
+ .pipe(_harmonize_id_column, settings.col_id, digits_only=True)
143
+ )
144
+ sond = clean_text_values(
145
+ safe_read_csv(settings.path_sondage)
146
+ .pipe(_rename_column, "code_sondage", settings.col_id)
147
+ .pipe(_harmonize_id_column, settings.col_id, digits_only=True)
148
+ )
149
+
150
+ for name, frame in {"sirh": sirh, "evaluation": evaluation, "sond": sond}.items():
151
+ _log_id_diagnostics(frame, name=name, col_id=settings.col_id)
152
+
153
+ frames = {
154
+ "sirh": sirh,
155
+ "evaluation": evaluation,
156
+ "sond": sond,
157
+ }
158
+ merged = _persist_sql_trace(frames, settings)
159
+
160
+ missing_cols = [settings.col_id] if settings.col_id not in merged.columns else []
161
+ if missing_cols:
162
+ raise KeyError(
163
+ f"La colonne {settings.col_id} est absente de la fusion finale. "
164
+ "Vérifiez vos fichiers sources."
165
+ )
166
+
167
+ logger.success("Fusion réalisée: {} lignes / {} colonnes", *merged.shape)
168
+ return merged
169
+
170
+
171
+ def save_dataset(df: pd.DataFrame, output_path: Path) -> None:
172
+ output_path.parent.mkdir(parents=True, exist_ok=True)
173
+ df.to_csv(output_path, index=False)
174
+ logger.success("Fichier fusionné sauvegardé dans {}", output_path)
175
+
176
+
177
+ # ---------------------------------------------------------------------------
178
+ # CLI
179
+ # ---------------------------------------------------------------------------
180
  @app.command()
181
  def main(
182
+ settings_path: Path = typer.Option(
183
+ None,
184
+ "--settings",
185
+ "-s",
186
+ help="Chemin vers un fichier settings.yml personnalisé.",
187
+ ),
188
+ output_path: Path = typer.Option(
189
+ INTERIM_DATA_DIR / "merged.csv",
190
+ "--output",
191
+ "-o",
192
+ help="Chemin de sortie du dataset fusionné.",
193
+ ),
194
  ):
195
+ """Entrypoint Typer pour reproduire la fusion des données brutes."""
196
+
197
+ settings = load_settings(settings_path) if settings_path else load_settings()
198
+ df = build_dataset(settings)
199
+ save_dataset(df, output_path)
 
 
200
 
201
 
202
  if __name__ == "__main__":
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/explainability.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Tuple
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from loguru import logger
9
+
10
+ from projet_05.branding import Theme, apply_brand_theme, make_diverging_cmap
11
+ from scripts_projet04.manet_projet04.shap_generator import ( # type: ignore[import-not-found]
12
+ shap_global,
13
+ shap_local,
14
+ )
15
+
16
+ apply_brand_theme()
17
+
18
+
19
+ def _shape_array(values) -> np.ndarray:
20
+ if hasattr(values, "values"):
21
+ arr = np.array(values.values)
22
+ else:
23
+ arr = np.array(values)
24
+ return np.nan_to_num(arr, copy=False)
25
+
26
+
27
+ def compute_shap_summary(
28
+ pipeline,
29
+ X: pd.DataFrame,
30
+ y: pd.Series,
31
+ *,
32
+ max_samples: int = 500,
33
+ ) -> Tuple[pd.DataFrame | None, object | None]:
34
+ """
35
+ Reuse the historical `shap_global` helper to build the plots and a tabular summary.
36
+
37
+ Returns
38
+ -------
39
+ summary_df : pd.DataFrame | None
40
+ Moyenne absolue des valeurs SHAP (ordre décroissant).
41
+ shap_values : shap.Explanation | None
42
+ Objet renvoyé par shap_global pour des analyses locales ultérieures.
43
+ """
44
+ cmap = make_diverging_cmap(Theme.PRIMARY, Theme.SECONDARY)
45
+ shap_values, _, feature_names = shap_global(
46
+ pipeline,
47
+ X,
48
+ y,
49
+ sample_size=max_samples,
50
+ cmap=cmap,
51
+ )
52
+ if shap_values is None or feature_names is None:
53
+ logger.warning("Impossible de générer les résumés SHAP.")
54
+ return None, None
55
+
56
+ shap_array = _shape_array(shap_values)
57
+ if shap_array.ndim == 1:
58
+ shap_array = shap_array.reshape(-1, 1)
59
+ mean_abs = np.abs(shap_array).mean(axis=0)
60
+ summary = (
61
+ pd.DataFrame({"feature": list(feature_names), "mean_abs_shap": mean_abs})
62
+ .sort_values("mean_abs_shap", ascending=False)
63
+ .reset_index(drop=True)
64
+ )
65
+ return summary, shap_values
66
+
67
+
68
+ def save_shap_summary(summary: pd.DataFrame, output_path: Path) -> None:
69
+ output_path.parent.mkdir(parents=True, exist_ok=True)
70
+ summary.to_csv(output_path, index=False)
71
+ logger.info("Résumé SHAP sauvegardé dans {}", output_path)
72
+
73
+
74
+ def export_local_explanations(
75
+ pipeline,
76
+ shap_values,
77
+ X: pd.DataFrame,
78
+ custom_index: int | None = None,
79
+ ) -> None:
80
+ """
81
+ Génère trois cas d'usage par défaut (impact max, risque max, risque min)
82
+ et un indice custom optionnel pour la trace historique.
83
+ """
84
+ if shap_values is None:
85
+ return
86
+
87
+ shap_array = _shape_array(shap_values)
88
+ idx_impact = int(np.argmax(np.sum(np.abs(shap_array), axis=1)))
89
+ shap_local(idx_impact, shap_values)
90
+
91
+ y_proba_all = pipeline.predict_proba(X)[:, 1]
92
+ idx_highrisk = int(np.argmax(y_proba_all))
93
+ shap_local(idx_highrisk, shap_values)
94
+
95
+ idx_lowrisk = int(np.argmin(y_proba_all))
96
+ shap_local(idx_lowrisk, shap_values, text_scale=0.6)
97
+
98
+ if custom_index is not None:
99
+ shap_local(custom_index, shap_values, max_display=8)
100
+
101
+
102
+ __all__ = ["compute_shap_summary", "save_shap_summary", "export_local_explanations"]
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/features.py CHANGED
@@ -1,28 +1,170 @@
 
 
 
 
1
  from pathlib import Path
2
 
 
 
3
  from loguru import logger
4
- from tqdm import tqdm
5
  import typer
6
 
7
- from projet_05.config import PROCESSED_DATA_DIR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- app = typer.Typer()
 
 
 
 
 
 
 
 
 
10
 
 
 
 
 
 
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  @app.command()
13
  def main(
14
- # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
- input_path: Path = PROCESSED_DATA_DIR / "dataset.csv",
16
- output_path: Path = PROCESSED_DATA_DIR / "features.csv",
17
- # -----------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  ):
19
- # ---- REPLACE THIS WITH YOUR OWN CODE ----
20
- logger.info("Generating features from dataset...")
21
- for i in tqdm(range(10), total=10):
22
- if i == 5:
23
- logger.info("Something happened for iteration 5.")
24
- logger.success("Features generation complete.")
25
- # -----------------------------------------
26
 
27
 
28
  if __name__ == "__main__":
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from datetime import datetime
5
  from pathlib import Path
6
 
7
+ import numpy as np
8
+ import pandas as pd
9
  from loguru import logger
 
10
  import typer
11
 
12
+ from projet_05.config import INTERIM_DATA_DIR, PROCESSED_DATA_DIR
13
+ from projet_05.settings import Settings, load_settings
14
+
15
+ app = typer.Typer(help="Génération des features et nettoyage de la cible.")
16
+
17
+ TARGET_MAPPING = {
18
+ "1": 1,
19
+ "0": 0,
20
+ "oui": 1,
21
+ "non": 0,
22
+ "true": 1,
23
+ "false": 0,
24
+ "quitte": 1,
25
+ "reste": 0,
26
+ "yes": 1,
27
+ "no": 0,
28
+ }
29
+
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # Utilitaires cœur de pipeline
33
+ # ---------------------------------------------------------------------------
34
+ def _load_merged_dataset(path: Path) -> pd.DataFrame:
35
+ if not path.exists():
36
+ raise FileNotFoundError(
37
+ f"Le fichier fusionné {path} est introuvable. Lancez `python projet_05/dataset.py` d'abord."
38
+ )
39
+ logger.info("Chargement du dataset fusionné depuis {}", path)
40
+ return pd.read_csv(path)
41
+
42
+
43
+ def _normalize_target(df: pd.DataFrame, settings: Settings) -> pd.DataFrame:
44
+ if settings.target not in df.columns:
45
+ raise KeyError(f"La variable cible '{settings.target}' est absente du fichier.")
46
+
47
+ normalized = (
48
+ df[settings.target]
49
+ .astype(str)
50
+ .str.strip()
51
+ .str.lower()
52
+ .map(TARGET_MAPPING)
53
+ )
54
+ df = df.copy()
55
+ df[settings.target] = normalized
56
+ before = len(df)
57
+ df = df[df[settings.target].isin([0, 1])].copy()
58
+ dropped = before - len(df)
59
+ if dropped:
60
+ logger.warning("Suppression de {} lignes avec une cible invalide.", dropped)
61
+ df[settings.target] = df[settings.target].astype(int)
62
+ return df
63
+
64
+
65
+ def _safe_ratio(df: pd.DataFrame, numerator: str, denominator: str, output: str) -> None:
66
+ if numerator not in df.columns or denominator not in df.columns:
67
+ return
68
+ denominator_series = df[denominator].replace({0: np.nan})
69
+ df[output] = df[numerator] / denominator_series
70
+
71
+
72
+ def _engineer_features(df: pd.DataFrame, settings: Settings) -> pd.DataFrame:
73
+ engineered = df.copy()
74
 
75
+ col = "augementation_salaire_precedente"
76
+ if col in engineered:
77
+ engineered[col] = (
78
+ engineered[col]
79
+ .astype(str)
80
+ .str.replace("%", "", regex=False)
81
+ .str.replace(",", ".", regex=False)
82
+ .str.strip()
83
+ )
84
+ engineered[col] = pd.to_numeric(engineered[col], errors="coerce") / 100
85
 
86
+ _safe_ratio(engineered, "augementation_salaire_precedente", "revenu_mensuel", "augmentation_par_revenu")
87
+ _safe_ratio(engineered, "annees_dans_le_poste_actuel", "annee_experience_totale", "annee_sur_poste_par_experience")
88
+ _safe_ratio(engineered, "nb_formations_suivies", "annee_experience_totale", "nb_formation_par_experience")
89
+ _safe_ratio(
90
+ engineered, "annees_depuis_la_derniere_promotion", "annee_experience_totale", "dern_promo_par_experience"
91
+ )
92
 
93
+ if settings.sat_cols:
94
+ existing = [col for col in settings.sat_cols if col in engineered.columns]
95
+ if existing:
96
+ engineered["score_moyen_satisfaction"] = engineered[existing].mean(axis=1)
97
+
98
+ if "note_evaluation_actuelle" in engineered.columns and "note_evaluation_precedente" in engineered.columns:
99
+ engineered["evolution_note"] = (
100
+ engineered["note_evaluation_actuelle"] - engineered["note_evaluation_precedente"]
101
+ )
102
+
103
+ return engineered
104
+
105
+
106
+ def build_features(settings: Settings, *, input_path: Path) -> pd.DataFrame:
107
+ df = _load_merged_dataset(input_path)
108
+ df = _normalize_target(df, settings)
109
+ df = _engineer_features(df, settings)
110
+ return df
111
+
112
+
113
+ def save_features(df: pd.DataFrame, output_path: Path) -> None:
114
+ output_path.parent.mkdir(parents=True, exist_ok=True)
115
+ df.to_csv(output_path, index=False)
116
+ logger.success("Dataset enrichi sauvegardé dans {}", output_path)
117
+
118
+
119
+ def save_schema(settings: Settings, output_path: Path) -> None:
120
+ schema = {
121
+ "target": settings.target,
122
+ "col_id": settings.col_id,
123
+ "numerical_features": list(settings.num_cols),
124
+ "categorical_features": list(settings.cat_cols),
125
+ "satisfaction_features": list(settings.sat_cols),
126
+ "generated_at": datetime.utcnow().isoformat(),
127
+ }
128
+ output_path.parent.mkdir(parents=True, exist_ok=True)
129
+ output_path.write_text(json.dumps(schema, indent=2), encoding="utf-8")
130
+ logger.info("Schéma sauvegardé dans {}", output_path)
131
+
132
+
133
+ # ---------------------------------------------------------------------------
134
+ # CLI
135
+ # ---------------------------------------------------------------------------
136
  @app.command()
137
  def main(
138
+ settings_path: Path = typer.Option(
139
+ None,
140
+ "--settings",
141
+ "-s",
142
+ help="Chemin optionnel vers un fichier settings.yml personnalisé.",
143
+ ),
144
+ input_path: Path = typer.Option(
145
+ INTERIM_DATA_DIR / "merged.csv",
146
+ "--input",
147
+ "-i",
148
+ help="Chemin du fichier issu de la fusion.",
149
+ ),
150
+ output_path: Path = typer.Option(
151
+ PROCESSED_DATA_DIR / "dataset.csv",
152
+ "--output",
153
+ "-o",
154
+ help="Chemin du fichier enrichi.",
155
+ ),
156
+ schema_path: Path = typer.Option(
157
+ PROCESSED_DATA_DIR / "schema.json",
158
+ "--schema",
159
+ help="Chemin de sauvegarde du schéma de features.",
160
+ ),
161
  ):
162
+ """Pipeline Typer pour préparer le dataset enrichi."""
163
+
164
+ settings = load_settings(settings_path) if settings_path else load_settings()
165
+ df = build_features(settings, input_path=input_path)
166
+ save_features(df, output_path)
167
+ save_schema(settings, schema_path)
 
168
 
169
 
170
  if __name__ == "__main__":
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/predict.py CHANGED
@@ -1,29 +1,99 @@
 
 
 
1
  from pathlib import Path
2
 
 
 
 
3
  from loguru import logger
4
- from tqdm import tqdm
5
  import typer
6
 
7
  from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
8
 
9
- app = typer.Typer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  @app.command()
13
  def main(
14
- # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
- features_path: Path = PROCESSED_DATA_DIR / "test_features.csv",
16
- model_path: Path = MODELS_DIR / "model.pkl",
17
- predictions_path: Path = PROCESSED_DATA_DIR / "test_predictions.csv",
18
- # -----------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  ):
20
- # ---- REPLACE THIS WITH YOUR OWN CODE ----
21
- logger.info("Performing inference for model...")
22
- for i in tqdm(range(10), total=10):
23
- if i == 5:
24
- logger.info("Something happened for iteration 5.")
25
- logger.success("Inference complete.")
26
- # -----------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  if __name__ == "__main__":
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
  from pathlib import Path
5
 
6
+ import numpy as np
7
+ import pandas as pd
8
+ from joblib import load
9
  from loguru import logger
 
10
  import typer
11
 
12
  from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
13
 
14
+ app = typer.Typer(help="Inférence à partir du pipeline entraîné.")
15
+
16
+
17
+ def load_pipeline(model_path: Path):
18
+ if not model_path.exists():
19
+ raise FileNotFoundError(f"Modèle introuvable: {model_path}")
20
+ logger.info("Chargement du modèle {}", model_path)
21
+ return load(model_path)
22
+
23
+
24
+ def load_metadata(metadata_path: Path) -> dict:
25
+ if not metadata_path.exists():
26
+ raise FileNotFoundError(f"Fichier métadonnées introuvable: {metadata_path}")
27
+ return json.loads(metadata_path.read_text(encoding="utf-8"))
28
+
29
+
30
+ def run_inference(
31
+ df: pd.DataFrame,
32
+ pipeline,
33
+ threshold: float,
34
+ drop_columns: list[str] | None = None,
35
+ required_features: list[str] | None = None,
36
+ ) -> pd.DataFrame:
37
+ features = df.drop(columns=drop_columns or [], errors="ignore")
38
+ if required_features:
39
+ for col in required_features:
40
+ if col not in features.columns:
41
+ features[col] = np.nan
42
+ features = features[required_features]
43
+ proba = pipeline.predict_proba(features)[:, 1]
44
+ predictions = (proba >= threshold).astype(int)
45
+ output = df.copy()
46
+ output["proba_depart"] = proba
47
+ output["prediction"] = predictions
48
+ return output
49
 
50
 
51
  @app.command()
52
  def main(
53
+ model_path: Path = typer.Option(
54
+ MODELS_DIR / "best_model.joblib",
55
+ "--model-path",
56
+ help="Pipeline entraîné sauvegardé via train.py",
57
+ ),
58
+ metadata_path: Path = typer.Option(
59
+ MODELS_DIR / "best_model_meta.json",
60
+ "--metadata-path",
61
+ help="Fichier JSON contenant le seuil optimal.",
62
+ ),
63
+ features_path: Path = typer.Option(
64
+ PROCESSED_DATA_DIR / "dataset.csv",
65
+ "--features",
66
+ "-f",
67
+ help="Jeu de features sur lequel produire des prédictions.",
68
+ ),
69
+ predictions_path: Path = typer.Option(
70
+ PROCESSED_DATA_DIR / "predictions.csv",
71
+ "--output",
72
+ "-o",
73
+ help="Chemin de sauvegarde des prédictions.",
74
+ ),
75
  ):
76
+ """Entrypoint Typer pour générer un fichier de prédictions."""
77
+
78
+ pipeline = load_pipeline(model_path)
79
+ metadata = load_metadata(metadata_path)
80
+ threshold = metadata.get("best_threshold", 0.5)
81
+ features_cfg = metadata.get("features", {})
82
+ required_features = (features_cfg.get("numerical") or []) + (features_cfg.get("categorical") or [])
83
+ df = pd.read_csv(features_path)
84
+ logger.info("Dataset chargé: {} lignes", len(df))
85
+
86
+ target_col = metadata.get("target")
87
+ predictions = run_inference(
88
+ df,
89
+ pipeline,
90
+ threshold,
91
+ drop_columns=[target_col] if target_col else None,
92
+ required_features=required_features or None,
93
+ )
94
+ predictions_path.parent.mkdir(parents=True, exist_ok=True)
95
+ predictions.to_csv(predictions_path, index=False)
96
+ logger.success("Prédictions sauvegardées dans {}", predictions_path)
97
 
98
 
99
  if __name__ == "__main__":
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/modeling/train.py CHANGED
@@ -1,29 +1,342 @@
 
 
 
 
1
  from pathlib import Path
 
2
 
 
 
 
 
 
3
  from loguru import logger
4
- from tqdm import tqdm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import typer
6
 
7
- from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- app = typer.Typer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  @app.command()
13
  def main(
14
- # ---- REPLACE DEFAULT PATHS AS APPROPRIATE ----
15
- features_path: Path = PROCESSED_DATA_DIR / "features.csv",
16
- labels_path: Path = PROCESSED_DATA_DIR / "labels.csv",
17
- model_path: Path = MODELS_DIR / "model.pkl",
18
- # -----------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  ):
20
- # ---- REPLACE THIS WITH YOUR OWN CODE ----
21
- logger.info("Training some model...")
22
- for i in tqdm(range(10), total=10):
23
- if i == 5:
24
- logger.info("Something happened for iteration 5.")
25
- logger.success("Modeling training complete.")
26
- # -----------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  if __name__ == "__main__":
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
  from pathlib import Path
6
+ from typing import Dict, Tuple
7
 
8
+ import numpy as np
9
+ import pandas as pd
10
+ from imblearn.over_sampling import SMOTE
11
+ from imblearn.pipeline import Pipeline as ImbPipeline
12
+ from joblib import dump
13
  from loguru import logger
14
+ from sklearn.base import clone
15
+ from sklearn.compose import ColumnTransformer
16
+ from sklearn.ensemble import RandomForestClassifier
17
+ from sklearn.impute import SimpleImputer
18
+ from sklearn.linear_model import LogisticRegression
19
+ from sklearn.metrics import (
20
+ f1_score,
21
+ precision_recall_curve,
22
+ precision_score,
23
+ recall_score,
24
+ roc_auc_score,
25
+ )
26
+ from sklearn.model_selection import GridSearchCV, StratifiedKFold, cross_val_predict
27
+ from sklearn.pipeline import Pipeline
28
+ from sklearn.preprocessing import OneHotEncoder, StandardScaler
29
  import typer
30
 
31
+ from projet_05.config import MODELS_DIR, PROCESSED_DATA_DIR, REPORTS_DIR
32
+ from projet_05.explainability import (
33
+ compute_shap_summary,
34
+ export_local_explanations,
35
+ save_shap_summary,
36
+ )
37
+ from projet_05.settings import Settings, load_settings
38
+
39
+ app = typer.Typer(help="Entraînement et sélection du meilleur modèle.")
40
+
41
+
42
+ def _clean_values(payload: dict) -> dict:
43
+ def _convert(value):
44
+ if isinstance(value, (np.floating, np.integer)):
45
+ return value.item()
46
+ return value
47
+
48
+ return {key: _convert(value) for key, value in payload.items()}
49
+
50
+
51
+ @dataclass
52
+ class ModelResult:
53
+ name: str
54
+ best_estimator: ImbPipeline
55
+ best_params: dict
56
+ best_threshold: float
57
+ metrics: Dict[str, float]
58
+
59
+
60
+ def load_processed_dataset(path: Path) -> pd.DataFrame:
61
+ if not path.exists():
62
+ raise FileNotFoundError(
63
+ f"Dataset traité introuvable ({path}). Lancez `python projet_05/features.py`."
64
+ )
65
+ logger.info("Chargement du dataset préparé depuis {}", path)
66
+ return pd.read_csv(path)
67
+
68
+
69
+ def split_features_target(df: pd.DataFrame, settings: Settings) -> Tuple[pd.DataFrame, pd.Series]:
70
+ if settings.target not in df.columns:
71
+ raise KeyError(f"La cible {settings.target} est absente du dataset.")
72
+ y = df[settings.target].astype(int)
73
+ drop_cols = [settings.target]
74
+ if settings.col_id in df.columns:
75
+ drop_cols.append(settings.col_id)
76
+ X = df.drop(columns=drop_cols, errors="ignore")
77
+ return X, y
78
+
79
+
80
+ def build_preprocessor(settings: Settings, X: pd.DataFrame) -> ColumnTransformer:
81
+ numeric_features = [col for col in settings.num_cols if col in X.columns]
82
+ categorical_features = [col for col in settings.cat_cols if col in X.columns]
83
+ if not numeric_features:
84
+ numeric_features = X.select_dtypes(include="number").columns.tolist()
85
+ if not categorical_features:
86
+ categorical_features = X.select_dtypes(exclude="number").columns.tolist()
87
+
88
+ numeric_transformer = Pipeline(
89
+ steps=[
90
+ ("imputer", SimpleImputer(strategy="median")),
91
+ ("scaler", StandardScaler()),
92
+ ]
93
+ )
94
+ categorical_transformer = Pipeline(
95
+ steps=[
96
+ ("imputer", SimpleImputer(strategy="most_frequent")),
97
+ ("encoder", OneHotEncoder(handle_unknown="ignore", sparse_output=False)),
98
+ ]
99
+ )
100
+ transformers = []
101
+ if numeric_features:
102
+ transformers.append(("num", numeric_transformer, numeric_features))
103
+ if categorical_features:
104
+ transformers.append(("cat", categorical_transformer, categorical_features))
105
+ if not transformers:
106
+ raise ValueError("Aucune feature disponible pour l'entraînement.")
107
+ return ColumnTransformer(transformers=transformers)
108
+
109
+
110
+ def get_models(random_state: int):
111
+ return {
112
+ "LogReg_balanced": (
113
+ LogisticRegression(
114
+ max_iter=2000,
115
+ class_weight="balanced",
116
+ random_state=random_state,
117
+ ),
118
+ [
119
+ {
120
+ "clf__solver": ["lbfgs"],
121
+ "clf__penalty": ["l2"],
122
+ "clf__C": [0.1, 1.0, 10.0],
123
+ },
124
+ {
125
+ "clf__solver": ["liblinear"],
126
+ "clf__penalty": ["l1", "l2"],
127
+ "clf__C": [0.1, 1.0, 10.0],
128
+ },
129
+ ],
130
+ ),
131
+ "RF_balanced": (
132
+ RandomForestClassifier(
133
+ n_estimators=300,
134
+ max_depth=8,
135
+ min_samples_split=10,
136
+ min_samples_leaf=5,
137
+ class_weight="balanced_subsample",
138
+ random_state=random_state,
139
+ ),
140
+ {
141
+ "clf__n_estimators": [200, 300, 500],
142
+ "clf__max_depth": [6, 8, 10],
143
+ "clf__min_samples_split": [5, 10, 15],
144
+ "clf__min_samples_leaf": [2, 5, 8],
145
+ },
146
+ ),
147
+ }
148
+
149
 
150
+ def _compute_best_threshold(y_true, y_proba):
151
+ precision, recall, thresholds = precision_recall_curve(y_true, y_proba)
152
+ f1_scores = 2 * (precision * recall) / (precision + recall + 1e-8)
153
+ best_idx = np.nanargmax(f1_scores)
154
+ if thresholds.size == 0:
155
+ return 0.5
156
+ best_idx = min(best_idx, thresholds.size - 1)
157
+ return thresholds[best_idx]
158
+
159
+
160
+ def evaluate_models(X, y, settings: Settings, preprocessor: ColumnTransformer) -> list[ModelResult]:
161
+ cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=settings.random_state)
162
+ results: list[ModelResult] = []
163
+
164
+ for name, (model, grid) in get_models(settings.random_state).items():
165
+ logger.info("Entraînement du modèle {}", name)
166
+ pipe = ImbPipeline(
167
+ steps=[
168
+ ("prep", preprocessor),
169
+ ("smote", SMOTE(random_state=settings.random_state)),
170
+ ("clf", model),
171
+ ]
172
+ )
173
+ search = GridSearchCV(
174
+ estimator=pipe,
175
+ param_grid=grid,
176
+ cv=cv,
177
+ scoring="f1",
178
+ n_jobs=-1,
179
+ )
180
+ search.fit(X, y)
181
+ best_pipe = search.best_estimator_
182
+
183
+ y_proba = cross_val_predict(best_pipe, X, y, cv=cv, method="predict_proba")[:, 1]
184
+ threshold = _compute_best_threshold(y, y_proba)
185
+ y_pred = (y_proba >= threshold).astype(int)
186
+
187
+ metrics = {
188
+ "f1": f1_score(y, y_pred),
189
+ "recall": recall_score(y, y_pred),
190
+ "precision": precision_score(y, y_pred),
191
+ "roc_auc": roc_auc_score(y, y_proba),
192
+ }
193
+ logger.info("Scores {} -> {}", name, metrics)
194
+ results.append(
195
+ ModelResult(
196
+ name=name,
197
+ best_estimator=best_pipe,
198
+ best_params=search.best_params_,
199
+ best_threshold=threshold,
200
+ metrics=metrics,
201
+ )
202
+ )
203
+ return results
204
+
205
+
206
+ def compute_dummy_baseline(y: pd.Series) -> dict:
207
+ majority = int(y.mode().iloc[0])
208
+ y_pred = np.full_like(y, fill_value=majority)
209
+ return {
210
+ "strategy": "most_frequent",
211
+ "majority_class": majority,
212
+ "f1": f1_score(y, y_pred),
213
+ "recall": recall_score(y, y_pred),
214
+ "precision": precision_score(y, y_pred, zero_division=0),
215
+ "roc_auc": 0.5,
216
+ }
217
+
218
+
219
+ def fit_final_pipeline(
220
+ best_result: ModelResult,
221
+ X: pd.DataFrame,
222
+ y: pd.Series,
223
+ settings: Settings,
224
+ ):
225
+ sm = SMOTE(random_state=settings.random_state)
226
+ X_bal, y_bal = sm.fit_resample(X, y)
227
+ final_preprocessor = build_preprocessor(settings, X)
228
+ clf = clone(best_result.best_estimator.named_steps["clf"])
229
+ final_pipe = Pipeline(
230
+ steps=[
231
+ ("prep", final_preprocessor),
232
+ ("clf", clf),
233
+ ]
234
+ )
235
+ final_pipe.fit(X_bal, y_bal)
236
+ logger.success(
237
+ "Modèle {} ré-entraîné sur {} lignes équilibrées.", best_result.name, len(X_bal)
238
+ )
239
+ return final_pipe
240
+
241
+
242
+ def save_artifacts(
243
+ pipeline: Pipeline,
244
+ results: list[ModelResult],
245
+ best_result: ModelResult,
246
+ baseline: dict,
247
+ settings: Settings,
248
+ model_path: Path,
249
+ metadata_path: Path,
250
+ shap_path: Path,
251
+ X: pd.DataFrame,
252
+ y: pd.Series,
253
+ ):
254
+ model_path.parent.mkdir(parents=True, exist_ok=True)
255
+ dump(pipeline, model_path)
256
+ logger.success("Pipeline sauvegardé dans {}", model_path)
257
+
258
+ metadata = {
259
+ "best_model": best_result.name,
260
+ "best_threshold": float(best_result.best_threshold),
261
+ "best_params": best_result.best_params,
262
+ "metrics": _clean_values(best_result.metrics),
263
+ "all_results": [
264
+ {
265
+ "model": r.name,
266
+ "metrics": _clean_values(r.metrics),
267
+ "best_threshold": float(r.best_threshold),
268
+ "best_params": r.best_params,
269
+ }
270
+ for r in results
271
+ ],
272
+ "baseline": _clean_values(baseline),
273
+ "features": {
274
+ "numerical": list(settings.num_cols),
275
+ "categorical": list(settings.cat_cols),
276
+ },
277
+ "target": settings.target,
278
+ }
279
+ metadata_path.parent.mkdir(parents=True, exist_ok=True)
280
+ metadata_path.write_text(json.dumps(metadata, indent=2), encoding="utf-8")
281
+ logger.info("Métadonnées sauvegardées dans {}", metadata_path)
282
+
283
+ shap_summary, shap_values = compute_shap_summary(pipeline, X, y)
284
+ if shap_summary is not None:
285
+ save_shap_summary(shap_summary, shap_path)
286
+ export_local_explanations(pipeline, shap_values, X)
287
 
288
 
289
  @app.command()
290
  def main(
291
+ settings_path: Path = typer.Option(None, "--settings", "-s", help="Chemin alternatif vers settings.yml."),
292
+ input_path: Path = typer.Option(
293
+ PROCESSED_DATA_DIR / "dataset.csv",
294
+ "--input",
295
+ "-i",
296
+ help="Dataset enrichi issu de projet_05/features.py",
297
+ ),
298
+ model_path: Path = typer.Option(
299
+ MODELS_DIR / "best_model.joblib",
300
+ "--model-path",
301
+ help="Chemin de sauvegarde du pipeline entraîné.",
302
+ ),
303
+ metadata_path: Path = typer.Option(
304
+ MODELS_DIR / "best_model_meta.json",
305
+ "--metadata-path",
306
+ help="Chemin de sauvegarde des métriques et métadonnées.",
307
+ ),
308
+ shap_path: Path = typer.Option(
309
+ REPORTS_DIR / "shap_summary.csv",
310
+ "--shap-path",
311
+ help="Chemin de sortie du résumé SHAP.",
312
+ ),
313
  ):
314
+ """Script principal pour lancer l'entraînement complet."""
315
+
316
+ settings = load_settings(settings_path) if settings_path else load_settings()
317
+ df = load_processed_dataset(input_path)
318
+ X, y = split_features_target(df, settings)
319
+ preprocessor = build_preprocessor(settings, X)
320
+ results = evaluate_models(X, y, settings, preprocessor)
321
+ if not results:
322
+ raise RuntimeError("Aucun modèle évalué. Vérifiez la configuration.")
323
+ best_result = max(results, key=lambda r: r.metrics["f1"])
324
+ baseline = compute_dummy_baseline(y)
325
+ logger.info("Baseline Dummy -> {}", baseline)
326
+
327
+ final_pipeline = fit_final_pipeline(best_result, X, y, settings)
328
+ save_artifacts(
329
+ final_pipeline,
330
+ results,
331
+ best_result,
332
+ baseline,
333
+ settings,
334
+ model_path,
335
+ metadata_path,
336
+ shap_path,
337
+ X,
338
+ y,
339
+ )
340
 
341
 
342
  if __name__ == "__main__":
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/settings.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from functools import lru_cache
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Iterable
8
+
9
+ import yaml
10
+
11
+ DEFAULT_SETTINGS_PATH = Path(__file__).with_name("settings.yml")
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class Settings:
16
+ random_state: int = 42
17
+ path_sirh: Path = field(default_factory=lambda: Path("data/raw/sirh.csv"))
18
+ path_eval: Path = field(default_factory=lambda: Path("data/raw/evaluation.csv"))
19
+ path_sondage: Path = field(default_factory=lambda: Path("data/raw/sondage.csv"))
20
+ col_id: str = "id_employee"
21
+ target: str = "a_quitte_l_entreprise"
22
+ num_cols: tuple[str, ...] = ()
23
+ cat_cols: tuple[str, ...] = ()
24
+ sat_cols: tuple[str, ...] = ()
25
+ first_vars: tuple[str, ...] = ()
26
+ subsample_frac: float = 1.0
27
+ sql_file: Path = field(default_factory=lambda: Path("merge_sql.sql"))
28
+ db_file: Path = field(default_factory=lambda: Path("merge_temp.db"))
29
+
30
+ def as_dict(self) -> dict:
31
+ """Return a serializable representation (useful for logging/tests)."""
32
+ return {
33
+ "random_state": self.random_state,
34
+ "path_sirh": str(self.path_sirh),
35
+ "path_eval": str(self.path_eval),
36
+ "path_sondage": str(self.path_sondage),
37
+ "col_id": self.col_id,
38
+ "target": self.target,
39
+ "num_cols": list(self.num_cols),
40
+ "cat_cols": list(self.cat_cols),
41
+ "sat_cols": list(self.sat_cols),
42
+ "first_vars": list(self.first_vars),
43
+ "subsample_frac": self.subsample_frac,
44
+ "sql_file": str(self.sql_file),
45
+ "db_file": str(self.db_file),
46
+ }
47
+
48
+
49
+ def _ensure_iterable(values: Iterable[str] | None, *, field_name: str) -> tuple[str, ...]:
50
+ if values is None:
51
+ return ()
52
+ if isinstance(values, str):
53
+ msg = f"'{field_name}' doit être une liste et non une chaîne isolée."
54
+ raise TypeError(msg)
55
+ return tuple(v for v in values if v)
56
+
57
+
58
+ def _resolve_path(candidate: str | os.PathLike[str] | None, *, base_dir: Path) -> Path:
59
+ if not candidate:
60
+ raise ValueError("Aucun chemin n'a été fourni dans le fichier de configuration.")
61
+ resolved = Path(candidate)
62
+ if not resolved.is_absolute():
63
+ resolved = (base_dir / resolved).resolve()
64
+ return resolved
65
+
66
+
67
+ def _load_raw_settings(path: Path) -> dict:
68
+ with path.open("r", encoding="utf-8") as handle:
69
+ data = yaml.safe_load(handle) or {}
70
+ if not isinstance(data, dict):
71
+ raise ValueError(f"Le fichier de configuration {path} doit contenir un dictionnaire YAML.")
72
+ return data
73
+
74
+
75
+ @lru_cache
76
+ def load_settings(custom_path: str | os.PathLike[str] | None = None) -> Settings:
77
+ """
78
+ Charger la configuration projet depuis un fichier YAML.
79
+
80
+ L'ordre de recherche est :
81
+ 1. Argument `custom_path` si fourni.
82
+ 2. Variable d'environnement `PROJET05_SETTINGS`.
83
+ 3. Fichier par défaut `projet_05/settings.yml`.
84
+ """
85
+
86
+ env_path = os.environ.get("PROJET05_SETTINGS")
87
+ raw_path = Path(custom_path or env_path or DEFAULT_SETTINGS_PATH)
88
+
89
+ if not raw_path.exists():
90
+ raise FileNotFoundError(
91
+ f"Fichier de configuration introuvable : {raw_path}. "
92
+ "Initialisez-le depuis projet_05/settings.yml ou indiquez PROJET05_SETTINGS."
93
+ )
94
+
95
+ base_dir = raw_path.parent
96
+ payload = _load_raw_settings(raw_path)
97
+ paths_block = payload.get("paths", {})
98
+
99
+ settings = Settings(
100
+ random_state=int(payload.get("random_state", Settings.random_state)),
101
+ path_sirh=_resolve_path(paths_block.get("sirh", Settings().path_sirh), base_dir=base_dir),
102
+ path_eval=_resolve_path(paths_block.get("evaluation", Settings().path_eval), base_dir=base_dir),
103
+ path_sondage=_resolve_path(paths_block.get("sondage", Settings().path_sondage), base_dir=base_dir),
104
+ col_id=payload.get("col_id", Settings.col_id),
105
+ target=payload.get("target", Settings.target),
106
+ num_cols=_ensure_iterable(payload.get("num_cols"), field_name="num_cols"),
107
+ cat_cols=_ensure_iterable(payload.get("cat_cols"), field_name="cat_cols"),
108
+ sat_cols=_ensure_iterable(payload.get("sat_cols"), field_name="sat_cols"),
109
+ first_vars=_ensure_iterable(payload.get("first_vars"), field_name="first_vars"),
110
+ subsample_frac=float(payload.get("subsample_frac", Settings.subsample_frac)),
111
+ sql_file=_resolve_path(paths_block.get("sql_file", Settings().sql_file), base_dir=base_dir),
112
+ db_file=_resolve_path(paths_block.get("db_file", Settings().db_file), base_dir=base_dir),
113
+ )
114
+ return settings
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/projet_05/settings.yml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ random_state: 42
2
+ col_id: id_employee
3
+ target: a_quitte_l_entreprise
4
+ subsample_frac: 0.5
5
+
6
+ paths:
7
+ sirh: ../data/raw/sirh.csv
8
+ evaluation: ../data/raw/evaluation.csv
9
+ sondage: ../data/raw/sondage.csv
10
+ sql_file: ../reports/merge_sql.sql
11
+ db_file: ../data/interim/merge_temp.db
12
+
13
+ num_cols:
14
+ - age
15
+ - revenu_mensuel
16
+ - annees_dans_l_entreprise
17
+ - annees_dans_le_poste_actuel
18
+ - annees_depuis_la_derniere_promotion
19
+ - distance_domicile_travail
20
+ - nombre_participation_pee
21
+ - note_evaluation_actuelle
22
+ - note_evaluation_precedente
23
+ - annees_depuis_le_changement_deposte
24
+ - annee_experience_totale
25
+ - nb_formations_suivies
26
+ - satisfaction_employee_environnement
27
+ - satisfaction_employee_nature_travail
28
+ - satisfaction_employee_equipe
29
+ - satisfaction_employee_equilibre_pro_perso
30
+ - augmentation_par_revenu
31
+ - annee_sur_poste_par_experience
32
+ - nb_formation_par_experience
33
+ - score_moyen_satisfaction
34
+ - dern_promo_par_experience
35
+ - evolution_note
36
+
37
+ cat_cols:
38
+ - genre
39
+ - departement
40
+ - frequence_deplacement
41
+ - etat_civil
42
+ - niveau_etudes
43
+ - role
44
+ - type_contrat
45
+
46
+ sat_cols:
47
+ - satisfaction_employee_environnement
48
+ - satisfaction_employee_nature_travail
49
+ - satisfaction_employee_equipe
50
+ - satisfaction_employee_equilibre_pro_perso
51
+
52
+ first_vars:
53
+ - age
54
+ - revenu_mensuel
55
+ - annees_dans_l_entreprise
56
+ - note_evaluation_actuelle