marintosti12 commited on
Commit
92a1fd3
·
1 Parent(s): 67092ff

final rework

Browse files
README.md CHANGED
@@ -52,6 +52,7 @@ Crée un fichier **.env** à la racine :
52
  DATABASE_URL=postgresql+psycopg2://futu:futu_pass@localhost:5432/futurisys
53
  # Hugging Face
54
  HF_TOKEN= Token Hugging Face
 
55
  ~~~
56
 
57
 
@@ -62,7 +63,6 @@ sudo docker compose up -d
62
  ~~~
63
 
64
  🗄️ Base de données
65
- ## 📊 Modèle de données
66
 
67
  ~~~mermaid
68
  classDiagram
@@ -178,12 +178,32 @@ classDiagram
178
  MLInput "1" --> "0..*" MLOutput
179
  ~~~
180
 
181
- ### 5. Lancer l’API
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  ~~~bash
184
  poetry run uvicorn main:app --reload --app-dir src
185
  ~~~
186
 
 
 
 
 
 
 
187
 
188
  ### 🧹 Qualité de code
189
 
 
52
  DATABASE_URL=postgresql+psycopg2://futu:futu_pass@localhost:5432/futurisys
53
  # Hugging Face
54
  HF_TOKEN= Token Hugging Face
55
+ HF_REPO_ID= Repo Hugging Face
56
  ~~~
57
 
58
 
 
63
  ~~~
64
 
65
  🗄️ Base de données
 
66
 
67
  ~~~mermaid
68
  classDiagram
 
178
  MLInput "1" --> "0..*" MLOutput
179
  ~~~
180
 
181
+ ### 5. Lancer Migrations
182
+
183
+ ~~~bash
184
+ export DATABASE_URL='postgresql+asyncpg://futu:futu_pass@localhost:5432/futurisys'
185
+ poetry run alembic upgrade head
186
+ ~~~
187
+
188
+ ### 6. Lancer Seeder
189
+
190
+ ~~~bash
191
+ export DATABASE_URL='postgresql+psycopg2://futu:futu_pass@localhost:5432/futurisys'
192
+ poetry run python src/seeds/ml_models_seed.py
193
+ ~~~
194
+
195
+ ### 7. Lancer l’API
196
 
197
  ~~~bash
198
  poetry run uvicorn main:app --reload --app-dir src
199
  ~~~
200
 
201
+ ### 8. Huggings Face
202
+
203
+ Pour générer les artefacts, exécuter les notebooks de machine learning.
204
+
205
+ Sur Hugging Face (Models), stocker les artefacts du modèle dans le dépôt du Space (models/) et nommer le fichier exactement comme le nom du modèle en base de données.
206
+
207
 
208
  ### 🧹 Qualité de code
209
 
notebook/1_Analyse.ipynb CHANGED
@@ -16,7 +16,7 @@
16
  },
17
  {
18
  "cell_type": "code",
19
- "execution_count": 7,
20
  "metadata": {},
21
  "outputs": [],
22
  "source": [
@@ -39,7 +39,7 @@
39
  },
40
  {
41
  "cell_type": "code",
42
- "execution_count": 8,
43
  "metadata": {},
44
  "outputs": [],
45
  "source": [
@@ -49,7 +49,7 @@
49
  },
50
  {
51
  "cell_type": "code",
52
- "execution_count": 9,
53
  "metadata": {},
54
  "outputs": [
55
  {
@@ -82,7 +82,7 @@
82
  },
83
  {
84
  "cell_type": "code",
85
- "execution_count": 10,
86
  "metadata": {},
87
  "outputs": [
88
  {
@@ -118,7 +118,7 @@
118
  },
119
  {
120
  "cell_type": "code",
121
- "execution_count": 11,
122
  "metadata": {},
123
  "outputs": [
124
  {
@@ -161,7 +161,7 @@
161
  },
162
  {
163
  "cell_type": "code",
164
- "execution_count": 12,
165
  "metadata": {},
166
  "outputs": [
167
  {
@@ -226,7 +226,7 @@
226
  },
227
  {
228
  "cell_type": "code",
229
- "execution_count": 13,
230
  "metadata": {},
231
  "outputs": [
232
  {
@@ -246,7 +246,7 @@
246
  },
247
  {
248
  "cell_type": "code",
249
- "execution_count": 14,
250
  "metadata": {},
251
  "outputs": [
252
  {
@@ -267,7 +267,7 @@
267
  },
268
  {
269
  "cell_type": "code",
270
- "execution_count": 15,
271
  "metadata": {},
272
  "outputs": [
273
  {
@@ -392,7 +392,7 @@
392
  },
393
  {
394
  "cell_type": "code",
395
- "execution_count": 16,
396
  "metadata": {},
397
  "outputs": [],
398
  "source": [
@@ -422,7 +422,7 @@
422
  },
423
  {
424
  "cell_type": "code",
425
- "execution_count": 17,
426
  "metadata": {},
427
  "outputs": [
428
  {
@@ -457,7 +457,7 @@
457
  },
458
  {
459
  "cell_type": "code",
460
- "execution_count": 18,
461
  "metadata": {},
462
  "outputs": [
463
  {
@@ -487,7 +487,7 @@
487
  },
488
  {
489
  "cell_type": "code",
490
- "execution_count": 19,
491
  "metadata": {},
492
  "outputs": [
493
  {
@@ -522,7 +522,7 @@
522
  },
523
  {
524
  "cell_type": "code",
525
- "execution_count": 20,
526
  "metadata": {},
527
  "outputs": [
528
  {
@@ -543,7 +543,7 @@
543
  "Name: count, dtype: int64"
544
  ]
545
  },
546
- "execution_count": 20,
547
  "metadata": {},
548
  "output_type": "execute_result"
549
  }
@@ -571,7 +571,7 @@
571
  },
572
  {
573
  "cell_type": "code",
574
- "execution_count": 21,
575
  "metadata": {},
576
  "outputs": [
577
  {
@@ -606,7 +606,7 @@
606
  },
607
  {
608
  "cell_type": "code",
609
- "execution_count": 22,
610
  "metadata": {},
611
  "outputs": [
612
  {
@@ -643,7 +643,7 @@
643
  },
644
  {
645
  "cell_type": "code",
646
- "execution_count": 23,
647
  "metadata": {},
648
  "outputs": [
649
  {
@@ -678,7 +678,7 @@
678
  },
679
  {
680
  "cell_type": "code",
681
- "execution_count": 24,
682
  "metadata": {},
683
  "outputs": [
684
  {
@@ -727,7 +727,7 @@
727
  },
728
  {
729
  "cell_type": "code",
730
- "execution_count": 25,
731
  "metadata": {},
732
  "outputs": [
733
  {
@@ -762,7 +762,7 @@
762
  },
763
  {
764
  "cell_type": "code",
765
- "execution_count": 26,
766
  "metadata": {},
767
  "outputs": [
768
  {
@@ -811,7 +811,7 @@
811
  },
812
  {
813
  "cell_type": "code",
814
- "execution_count": 27,
815
  "metadata": {},
816
  "outputs": [
817
  {
@@ -847,7 +847,7 @@
847
  },
848
  {
849
  "cell_type": "code",
850
- "execution_count": 28,
851
  "metadata": {},
852
  "outputs": [
853
  {
@@ -889,7 +889,7 @@
889
  },
890
  {
891
  "cell_type": "code",
892
- "execution_count": 29,
893
  "metadata": {},
894
  "outputs": [
895
  {
@@ -938,7 +938,7 @@
938
  },
939
  {
940
  "cell_type": "code",
941
- "execution_count": 30,
942
  "metadata": {},
943
  "outputs": [
944
  {
@@ -984,7 +984,7 @@
984
  },
985
  {
986
  "cell_type": "code",
987
- "execution_count": 31,
988
  "metadata": {},
989
  "outputs": [
990
  {
@@ -1034,7 +1034,7 @@
1034
  },
1035
  {
1036
  "cell_type": "code",
1037
- "execution_count": 32,
1038
  "metadata": {},
1039
  "outputs": [
1040
  {
@@ -1069,7 +1069,7 @@
1069
  },
1070
  {
1071
  "cell_type": "code",
1072
- "execution_count": 33,
1073
  "metadata": {},
1074
  "outputs": [
1075
  {
@@ -1113,7 +1113,7 @@
1113
  },
1114
  {
1115
  "cell_type": "code",
1116
- "execution_count": 34,
1117
  "metadata": {},
1118
  "outputs": [
1119
  {
@@ -1159,7 +1159,7 @@
1159
  },
1160
  {
1161
  "cell_type": "code",
1162
- "execution_count": 35,
1163
  "metadata": {},
1164
  "outputs": [
1165
  {
@@ -1195,7 +1195,7 @@
1195
  },
1196
  {
1197
  "cell_type": "code",
1198
- "execution_count": 36,
1199
  "metadata": {},
1200
  "outputs": [
1201
  {
@@ -1232,7 +1232,7 @@
1232
  },
1233
  {
1234
  "cell_type": "code",
1235
- "execution_count": 37,
1236
  "metadata": {},
1237
  "outputs": [
1238
  {
@@ -1274,7 +1274,7 @@
1274
  },
1275
  {
1276
  "cell_type": "code",
1277
- "execution_count": 38,
1278
  "metadata": {},
1279
  "outputs": [
1280
  {
@@ -1313,7 +1313,7 @@
1313
  },
1314
  {
1315
  "cell_type": "code",
1316
- "execution_count": 39,
1317
  "metadata": {},
1318
  "outputs": [
1319
  {
@@ -1322,7 +1322,7 @@
1322
  "['df.joblib']"
1323
  ]
1324
  },
1325
- "execution_count": 39,
1326
  "metadata": {},
1327
  "output_type": "execute_result"
1328
  }
 
16
  },
17
  {
18
  "cell_type": "code",
19
+ "execution_count": 1,
20
  "metadata": {},
21
  "outputs": [],
22
  "source": [
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 2,
43
  "metadata": {},
44
  "outputs": [],
45
  "source": [
 
49
  },
50
  {
51
  "cell_type": "code",
52
+ "execution_count": 3,
53
  "metadata": {},
54
  "outputs": [
55
  {
 
82
  },
83
  {
84
  "cell_type": "code",
85
+ "execution_count": 4,
86
  "metadata": {},
87
  "outputs": [
88
  {
 
118
  },
119
  {
120
  "cell_type": "code",
121
+ "execution_count": 5,
122
  "metadata": {},
123
  "outputs": [
124
  {
 
161
  },
162
  {
163
  "cell_type": "code",
164
+ "execution_count": 6,
165
  "metadata": {},
166
  "outputs": [
167
  {
 
226
  },
227
  {
228
  "cell_type": "code",
229
+ "execution_count": 7,
230
  "metadata": {},
231
  "outputs": [
232
  {
 
246
  },
247
  {
248
  "cell_type": "code",
249
+ "execution_count": 8,
250
  "metadata": {},
251
  "outputs": [
252
  {
 
267
  },
268
  {
269
  "cell_type": "code",
270
+ "execution_count": 9,
271
  "metadata": {},
272
  "outputs": [
273
  {
 
392
  },
393
  {
394
  "cell_type": "code",
395
+ "execution_count": 10,
396
  "metadata": {},
397
  "outputs": [],
398
  "source": [
 
422
  },
423
  {
424
  "cell_type": "code",
425
+ "execution_count": 11,
426
  "metadata": {},
427
  "outputs": [
428
  {
 
457
  },
458
  {
459
  "cell_type": "code",
460
+ "execution_count": 12,
461
  "metadata": {},
462
  "outputs": [
463
  {
 
487
  },
488
  {
489
  "cell_type": "code",
490
+ "execution_count": 13,
491
  "metadata": {},
492
  "outputs": [
493
  {
 
522
  },
523
  {
524
  "cell_type": "code",
525
+ "execution_count": 14,
526
  "metadata": {},
527
  "outputs": [
528
  {
 
543
  "Name: count, dtype: int64"
544
  ]
545
  },
546
+ "execution_count": 14,
547
  "metadata": {},
548
  "output_type": "execute_result"
549
  }
 
571
  },
572
  {
573
  "cell_type": "code",
574
+ "execution_count": 15,
575
  "metadata": {},
576
  "outputs": [
577
  {
 
606
  },
607
  {
608
  "cell_type": "code",
609
+ "execution_count": 16,
610
  "metadata": {},
611
  "outputs": [
612
  {
 
643
  },
644
  {
645
  "cell_type": "code",
646
+ "execution_count": 17,
647
  "metadata": {},
648
  "outputs": [
649
  {
 
678
  },
679
  {
680
  "cell_type": "code",
681
+ "execution_count": 18,
682
  "metadata": {},
683
  "outputs": [
684
  {
 
727
  },
728
  {
729
  "cell_type": "code",
730
+ "execution_count": 19,
731
  "metadata": {},
732
  "outputs": [
733
  {
 
762
  },
763
  {
764
  "cell_type": "code",
765
+ "execution_count": 20,
766
  "metadata": {},
767
  "outputs": [
768
  {
 
811
  },
812
  {
813
  "cell_type": "code",
814
+ "execution_count": 21,
815
  "metadata": {},
816
  "outputs": [
817
  {
 
847
  },
848
  {
849
  "cell_type": "code",
850
+ "execution_count": 22,
851
  "metadata": {},
852
  "outputs": [
853
  {
 
889
  },
890
  {
891
  "cell_type": "code",
892
+ "execution_count": 23,
893
  "metadata": {},
894
  "outputs": [
895
  {
 
938
  },
939
  {
940
  "cell_type": "code",
941
+ "execution_count": 24,
942
  "metadata": {},
943
  "outputs": [
944
  {
 
984
  },
985
  {
986
  "cell_type": "code",
987
+ "execution_count": 25,
988
  "metadata": {},
989
  "outputs": [
990
  {
 
1034
  },
1035
  {
1036
  "cell_type": "code",
1037
+ "execution_count": 26,
1038
  "metadata": {},
1039
  "outputs": [
1040
  {
 
1069
  },
1070
  {
1071
  "cell_type": "code",
1072
+ "execution_count": 27,
1073
  "metadata": {},
1074
  "outputs": [
1075
  {
 
1113
  },
1114
  {
1115
  "cell_type": "code",
1116
+ "execution_count": 28,
1117
  "metadata": {},
1118
  "outputs": [
1119
  {
 
1159
  },
1160
  {
1161
  "cell_type": "code",
1162
+ "execution_count": 29,
1163
  "metadata": {},
1164
  "outputs": [
1165
  {
 
1195
  },
1196
  {
1197
  "cell_type": "code",
1198
+ "execution_count": 30,
1199
  "metadata": {},
1200
  "outputs": [
1201
  {
 
1232
  },
1233
  {
1234
  "cell_type": "code",
1235
+ "execution_count": 31,
1236
  "metadata": {},
1237
  "outputs": [
1238
  {
 
1274
  },
1275
  {
1276
  "cell_type": "code",
1277
+ "execution_count": 32,
1278
  "metadata": {},
1279
  "outputs": [
1280
  {
 
1313
  },
1314
  {
1315
  "cell_type": "code",
1316
+ "execution_count": 33,
1317
  "metadata": {},
1318
  "outputs": [
1319
  {
 
1322
  "['df.joblib']"
1323
  ]
1324
  },
1325
+ "execution_count": 33,
1326
  "metadata": {},
1327
  "output_type": "execute_result"
1328
  }
notebook/2_Nettoyage_Standardisation.ipynb CHANGED
@@ -16,7 +16,7 @@
16
  },
17
  {
18
  "cell_type": "code",
19
- "execution_count": 2,
20
  "metadata": {},
21
  "outputs": [],
22
  "source": [
@@ -30,7 +30,7 @@
30
  },
31
  {
32
  "cell_type": "code",
33
- "execution_count": 3,
34
  "metadata": {},
35
  "outputs": [
36
  {
@@ -92,7 +92,7 @@
92
  },
93
  {
94
  "cell_type": "code",
95
- "execution_count": 4,
96
  "metadata": {},
97
  "outputs": [
98
  {
@@ -146,7 +146,7 @@
146
  },
147
  {
148
  "cell_type": "code",
149
- "execution_count": 5,
150
  "metadata": {},
151
  "outputs": [
152
  {
@@ -169,7 +169,7 @@
169
  },
170
  {
171
  "cell_type": "code",
172
- "execution_count": 6,
173
  "metadata": {},
174
  "outputs": [],
175
  "source": [
@@ -196,7 +196,7 @@
196
  },
197
  {
198
  "cell_type": "code",
199
- "execution_count": 7,
200
  "metadata": {},
201
  "outputs": [],
202
  "source": [
@@ -214,7 +214,7 @@
214
  },
215
  {
216
  "cell_type": "code",
217
- "execution_count": 8,
218
  "metadata": {},
219
  "outputs": [
220
  {
@@ -242,7 +242,7 @@
242
  },
243
  {
244
  "cell_type": "code",
245
- "execution_count": 9,
246
  "metadata": {},
247
  "outputs": [
248
  {
@@ -303,7 +303,7 @@
303
  },
304
  {
305
  "cell_type": "code",
306
- "execution_count": 10,
307
  "metadata": {},
308
  "outputs": [
309
  {
@@ -364,7 +364,7 @@
364
  },
365
  {
366
  "cell_type": "code",
367
- "execution_count": 11,
368
  "metadata": {},
369
  "outputs": [
370
  {
@@ -425,7 +425,7 @@
425
  },
426
  {
427
  "cell_type": "code",
428
- "execution_count": 12,
429
  "metadata": {},
430
  "outputs": [
431
  {
@@ -434,7 +434,7 @@
434
  "['df.joblib']"
435
  ]
436
  },
437
- "execution_count": 12,
438
  "metadata": {},
439
  "output_type": "execute_result"
440
  }
 
16
  },
17
  {
18
  "cell_type": "code",
19
+ "execution_count": 1,
20
  "metadata": {},
21
  "outputs": [],
22
  "source": [
 
30
  },
31
  {
32
  "cell_type": "code",
33
+ "execution_count": 2,
34
  "metadata": {},
35
  "outputs": [
36
  {
 
92
  },
93
  {
94
  "cell_type": "code",
95
+ "execution_count": 3,
96
  "metadata": {},
97
  "outputs": [
98
  {
 
146
  },
147
  {
148
  "cell_type": "code",
149
+ "execution_count": 4,
150
  "metadata": {},
151
  "outputs": [
152
  {
 
169
  },
170
  {
171
  "cell_type": "code",
172
+ "execution_count": 5,
173
  "metadata": {},
174
  "outputs": [],
175
  "source": [
 
196
  },
197
  {
198
  "cell_type": "code",
199
+ "execution_count": 6,
200
  "metadata": {},
201
  "outputs": [],
202
  "source": [
 
214
  },
215
  {
216
  "cell_type": "code",
217
+ "execution_count": 7,
218
  "metadata": {},
219
  "outputs": [
220
  {
 
242
  },
243
  {
244
  "cell_type": "code",
245
+ "execution_count": 8,
246
  "metadata": {},
247
  "outputs": [
248
  {
 
303
  },
304
  {
305
  "cell_type": "code",
306
+ "execution_count": 9,
307
  "metadata": {},
308
  "outputs": [
309
  {
 
364
  },
365
  {
366
  "cell_type": "code",
367
+ "execution_count": 10,
368
  "metadata": {},
369
  "outputs": [
370
  {
 
425
  },
426
  {
427
  "cell_type": "code",
428
+ "execution_count": 11,
429
  "metadata": {},
430
  "outputs": [
431
  {
 
434
  "['df.joblib']"
435
  ]
436
  },
437
+ "execution_count": 11,
438
  "metadata": {},
439
  "output_type": "execute_result"
440
  }
notebook/3_Feature_Engineering.ipynb CHANGED
@@ -234,9 +234,9 @@
234
  "name": "stderr",
235
  "output_type": "stream",
236
  "text": [
237
- "/tmp/ipykernel_59311/3338083500.py:6: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
238
  " proba_depart_sat = df.groupby('tranche_sat_mean')['a_quitte_l_entreprise'].mean()\n",
239
- "/tmp/ipykernel_59311/3338083500.py:9: FutureWarning: \n",
240
  "\n",
241
  "Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.\n",
242
  "\n",
 
234
  "name": "stderr",
235
  "output_type": "stream",
236
  "text": [
237
+ "/tmp/ipykernel_256586/3338083500.py:6: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
238
  " proba_depart_sat = df.groupby('tranche_sat_mean')['a_quitte_l_entreprise'].mean()\n",
239
+ "/tmp/ipykernel_256586/3338083500.py:9: FutureWarning: \n",
240
  "\n",
241
  "Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.\n",
242
  "\n",
notebook/4_Modelisation.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -4,6 +4,7 @@ version = "0.1.0"
4
  description = ""
5
  authors = ["marintosti12 <marintosti12@gmail.com>"]
6
  readme = "README.md"
 
7
 
8
  [tool.poetry.dependencies]
9
  python = "^3.12"
 
4
  description = ""
5
  authors = ["marintosti12 <marintosti12@gmail.com>"]
6
  readme = "README.md"
7
+ package-mode = false
8
 
9
  [tool.poetry.dependencies]
10
  python = "^3.12"
src/controllers/predict_controller.py CHANGED
@@ -3,7 +3,6 @@ from fastapi import APIRouter, Depends, HTTPException, Body, status
3
  from config.db import get_db
4
  from models.ml import MLModel
5
 
6
- # Schemas
7
  from models.ml_inputs import MLInput
8
  from models.ml_output import MLOutput
9
 
 
3
  from config.db import get_db
4
  from models.ml import MLModel
5
 
 
6
  from models.ml_inputs import MLInput
7
  from models.ml_output import MLOutput
8