Bachstelze commited on
Commit
e0f3135
·
1 Parent(s): 39b920c

redo previous commit and add dictionary pickle

Browse files
Files changed (1) hide show
  1. A3/A3_Report.ipynb +29 -38
A3/A3_Report.ipynb CHANGED
@@ -47,7 +47,7 @@
47
  },
48
  {
49
  "cell_type": "code",
50
- "execution_count": 264,
51
  "id": "edbe3fbd",
52
  "metadata": {},
53
  "outputs": [],
@@ -77,7 +77,7 @@
77
  },
78
  {
79
  "cell_type": "code",
80
- "execution_count": 265,
81
  "id": "23f1b38b",
82
  "metadata": {},
83
  "outputs": [
@@ -128,7 +128,7 @@
128
  },
129
  {
130
  "cell_type": "code",
131
- "execution_count": 266,
132
  "id": "080ab472",
133
  "metadata": {},
134
  "outputs": [
@@ -200,7 +200,7 @@
200
  },
201
  {
202
  "cell_type": "code",
203
- "execution_count": 267,
204
  "id": "438e27ae",
205
  "metadata": {},
206
  "outputs": [
@@ -298,7 +298,7 @@
298
  },
299
  {
300
  "cell_type": "code",
301
- "execution_count": 268,
302
  "id": "7560ae66",
303
  "metadata": {},
304
  "outputs": [
@@ -335,7 +335,7 @@
335
  },
336
  {
337
  "cell_type": "code",
338
- "execution_count": 269,
339
  "id": "9f17a88e",
340
  "metadata": {},
341
  "outputs": [
@@ -381,7 +381,7 @@
381
  },
382
  {
383
  "cell_type": "code",
384
- "execution_count": 270,
385
  "id": "d4c02996",
386
  "metadata": {},
387
  "outputs": [],
@@ -404,7 +404,7 @@
404
  },
405
  {
406
  "cell_type": "code",
407
- "execution_count": 271,
408
  "id": "c8292b2b",
409
  "metadata": {},
410
  "outputs": [],
@@ -442,7 +442,7 @@
442
  },
443
  {
444
  "cell_type": "code",
445
- "execution_count": 272,
446
  "id": "b598aef7",
447
  "metadata": {},
448
  "outputs": [
@@ -475,7 +475,7 @@
475
  },
476
  {
477
  "cell_type": "code",
478
- "execution_count": 273,
479
  "id": "962743cc",
480
  "metadata": {},
481
  "outputs": [
@@ -603,7 +603,7 @@
603
  },
604
  {
605
  "cell_type": "code",
606
- "execution_count": 274,
607
  "id": "5c9efd5b",
608
  "metadata": {},
609
  "outputs": [
@@ -636,7 +636,7 @@
636
  },
637
  {
638
  "cell_type": "code",
639
- "execution_count": 275,
640
  "id": "ce01a75f",
641
  "metadata": {},
642
  "outputs": [
@@ -814,7 +814,7 @@
814
  },
815
  {
816
  "cell_type": "code",
817
- "execution_count": 276,
818
  "id": "3e5e5e9b",
819
  "metadata": {},
820
  "outputs": [
@@ -849,7 +849,7 @@
849
  },
850
  {
851
  "cell_type": "code",
852
- "execution_count": 277,
853
  "id": "4de69063",
854
  "metadata": {},
855
  "outputs": [
@@ -885,7 +885,7 @@
885
  },
886
  {
887
  "cell_type": "code",
888
- "execution_count": 278,
889
  "id": "a994b1af",
890
  "metadata": {},
891
  "outputs": [
@@ -1035,7 +1035,7 @@
1035
  },
1036
  {
1037
  "cell_type": "code",
1038
- "execution_count": 279,
1039
  "id": "00f3eda4",
1040
  "metadata": {},
1041
  "outputs": [
@@ -1067,7 +1067,7 @@
1067
  },
1068
  {
1069
  "cell_type": "code",
1070
- "execution_count": 280,
1071
  "id": "6b03902f",
1072
  "metadata": {},
1073
  "outputs": [
@@ -1249,13 +1249,7 @@
1249
  "source": [
1250
  "## 8. Why we did not use polynomial features\n",
1251
  "\n",
1252
- "We tested polynomial interaction features (degree=2) which created 820 new features from the original 40. However, this approach was not used in the final model because:\n",
1253
- "\n",
1254
- "1. **Minimal improvement**: The F1-score improvement was negligible\n",
1255
- "2. **Increased complexity**: 820 features vs 40 original features makes the model harder to interpret and deploy\n",
1256
- "3. **Risk of overfitting**: Many more parameters to learn from the same amount of data\n",
1257
- "\n",
1258
- "The tuned body region model without polynomial features provides a good balance of accuracy and simplicity."
1259
  ]
1260
  },
1261
  {
@@ -1268,7 +1262,7 @@
1268
  },
1269
  {
1270
  "cell_type": "code",
1271
- "execution_count": 281,
1272
  "id": "0b3e066a",
1273
  "metadata": {},
1274
  "outputs": [
@@ -1404,7 +1398,7 @@
1404
  },
1405
  {
1406
  "cell_type": "code",
1407
- "execution_count": 282,
1408
  "id": "d21c037d",
1409
  "metadata": {},
1410
  "outputs": [
@@ -1482,7 +1476,7 @@
1482
  },
1483
  {
1484
  "cell_type": "code",
1485
- "execution_count": 283,
1486
  "id": "4f01e27a",
1487
  "metadata": {},
1488
  "outputs": [
@@ -1503,9 +1497,10 @@
1503
  "# This is done via create_model_artifact.py (utility script)\n",
1504
  "\n",
1505
  "model_filename = 'final_champion_model_A3.pkl'\n",
 
1506
  "with open(model_filename, 'wb') as f:\n",
1507
- " pickle.dump(final_model, f)\n",
1508
- "print(f\"Champion model saved: {model_filename}\")\n",
1509
  "print(\"\\nNote: The deployable Pipeline model is saved separately as models/classification_champion.pkl\")"
1510
  ]
1511
  },
@@ -1516,7 +1511,7 @@
1516
  "source": [
1517
  "## 10. Deployment\n",
1518
  "\n",
1519
- "The classification endpoint is added to the existing Gradio app as a second tab. Tab 1 has Movement Scoring from A2. Tab 2 has Body Region Classification which takes 40 deviation features as input and outputs the predicted body region. The model is an sklearn Pipeline of StandardScaler + LogisticRegression getting 76.4% accuracy and 77.4% F1-weighted.\n",
1520
  "\n",
1521
  "Deployment URL: https://huggingface.co/spaces/Bachstelze/github_sync"
1522
  ]
@@ -1547,7 +1542,7 @@
1547
  },
1548
  {
1549
  "cell_type": "code",
1550
- "execution_count": 284,
1551
  "id": "9c52b59b",
1552
  "metadata": {},
1553
  "outputs": [
@@ -1637,17 +1632,13 @@
1637
  "| 3 | Baseline | Body Regions | Grouped classes (Upper/Lower) |\n",
1638
  "| 4 | Tuned | Body Regions | GridSearchCV (5-fold CV) |\n",
1639
  "\n",
1640
- "Note: Polynomial interaction features were tested but not included in final iterations due to minimal improvement and increased complexity (820 features vs 40).\n",
1641
- "\n",
1642
- "### Deployed model\n",
1643
- "\n",
1644
- "The deployed model uses body region classification with an sklearn Pipeline of StandardScaler + LogisticRegression with balanced class weights. It takes 40 input features and gets 76.4% accuracy and 77.4% F1-weighted on test."
1645
  ]
1646
  }
1647
  ],
1648
  "metadata": {
1649
  "kernelspec": {
1650
- "display_name": "Python 3",
1651
  "language": "python",
1652
  "name": "python3"
1653
  },
@@ -1661,7 +1652,7 @@
1661
  "name": "python",
1662
  "nbconvert_exporter": "python",
1663
  "pygments_lexer": "ipython3",
1664
- "version": "3.14.0"
1665
  }
1666
  },
1667
  "nbformat": 4,
 
47
  },
48
  {
49
  "cell_type": "code",
50
+ "execution_count": 306,
51
  "id": "edbe3fbd",
52
  "metadata": {},
53
  "outputs": [],
 
77
  },
78
  {
79
  "cell_type": "code",
80
+ "execution_count": 307,
81
  "id": "23f1b38b",
82
  "metadata": {},
83
  "outputs": [
 
128
  },
129
  {
130
  "cell_type": "code",
131
+ "execution_count": 308,
132
  "id": "080ab472",
133
  "metadata": {},
134
  "outputs": [
 
200
  },
201
  {
202
  "cell_type": "code",
203
+ "execution_count": 309,
204
  "id": "438e27ae",
205
  "metadata": {},
206
  "outputs": [
 
298
  },
299
  {
300
  "cell_type": "code",
301
+ "execution_count": 310,
302
  "id": "7560ae66",
303
  "metadata": {},
304
  "outputs": [
 
335
  },
336
  {
337
  "cell_type": "code",
338
+ "execution_count": 311,
339
  "id": "9f17a88e",
340
  "metadata": {},
341
  "outputs": [
 
381
  },
382
  {
383
  "cell_type": "code",
384
+ "execution_count": 312,
385
  "id": "d4c02996",
386
  "metadata": {},
387
  "outputs": [],
 
404
  },
405
  {
406
  "cell_type": "code",
407
+ "execution_count": 313,
408
  "id": "c8292b2b",
409
  "metadata": {},
410
  "outputs": [],
 
442
  },
443
  {
444
  "cell_type": "code",
445
+ "execution_count": 314,
446
  "id": "b598aef7",
447
  "metadata": {},
448
  "outputs": [
 
475
  },
476
  {
477
  "cell_type": "code",
478
+ "execution_count": 315,
479
  "id": "962743cc",
480
  "metadata": {},
481
  "outputs": [
 
603
  },
604
  {
605
  "cell_type": "code",
606
+ "execution_count": 316,
607
  "id": "5c9efd5b",
608
  "metadata": {},
609
  "outputs": [
 
636
  },
637
  {
638
  "cell_type": "code",
639
+ "execution_count": 317,
640
  "id": "ce01a75f",
641
  "metadata": {},
642
  "outputs": [
 
814
  },
815
  {
816
  "cell_type": "code",
817
+ "execution_count": 318,
818
  "id": "3e5e5e9b",
819
  "metadata": {},
820
  "outputs": [
 
849
  },
850
  {
851
  "cell_type": "code",
852
+ "execution_count": 319,
853
  "id": "4de69063",
854
  "metadata": {},
855
  "outputs": [
 
885
  },
886
  {
887
  "cell_type": "code",
888
+ "execution_count": 320,
889
  "id": "a994b1af",
890
  "metadata": {},
891
  "outputs": [
 
1035
  },
1036
  {
1037
  "cell_type": "code",
1038
+ "execution_count": 321,
1039
  "id": "00f3eda4",
1040
  "metadata": {},
1041
  "outputs": [
 
1067
  },
1068
  {
1069
  "cell_type": "code",
1070
+ "execution_count": 322,
1071
  "id": "6b03902f",
1072
  "metadata": {},
1073
  "outputs": [
 
1249
  "source": [
1250
  "## 8. Why we did not use polynomial features\n",
1251
  "\n",
1252
+ "We tested polynomial interaction features which created 820 new features from the original 40. However, this approach was not used in the final model because the F1-score improvement was negligible, 820 features vs 40 original features makes it hard to interpret the model. Many more parameters to learn from the same amount of data, so the tuned body region model without polynomial features provides a good balance of accuracy and simplicity."
 
 
 
 
 
 
1253
  ]
1254
  },
1255
  {
 
1262
  },
1263
  {
1264
  "cell_type": "code",
1265
+ "execution_count": 323,
1266
  "id": "0b3e066a",
1267
  "metadata": {},
1268
  "outputs": [
 
1398
  },
1399
  {
1400
  "cell_type": "code",
1401
+ "execution_count": 324,
1402
  "id": "d21c037d",
1403
  "metadata": {},
1404
  "outputs": [
 
1476
  },
1477
  {
1478
  "cell_type": "code",
1479
+ "execution_count": 325,
1480
  "id": "4f01e27a",
1481
  "metadata": {},
1482
  "outputs": [
 
1497
  "# This is done via create_model_artifact.py (utility script)\n",
1498
  "\n",
1499
  "model_filename = 'final_champion_model_A3.pkl'\n",
1500
+ "model_dictionary = {\"model\": final_model}\n",
1501
  "with open(model_filename, 'wb') as f:\n",
1502
+ " pickle.dump(model_dictionary, f)\n",
1503
+ "print(f\"Champion model dictionary saved: {model_filename}\")\n",
1504
  "print(\"\\nNote: The deployable Pipeline model is saved separately as models/classification_champion.pkl\")"
1505
  ]
1506
  },
 
1511
  "source": [
1512
  "## 10. Deployment\n",
1513
  "\n",
1514
+ "The classification endpoint is added to the existing Gradio app as a second tab. Tab 1 has Movement Scoring from A2. Tab 2 has Body Region Classification which takes 40 deviation features as input and outputs the predicted body region.\n",
1515
  "\n",
1516
  "Deployment URL: https://huggingface.co/spaces/Bachstelze/github_sync"
1517
  ]
 
1542
  },
1543
  {
1544
  "cell_type": "code",
1545
+ "execution_count": 326,
1546
  "id": "9c52b59b",
1547
  "metadata": {},
1548
  "outputs": [
 
1632
  "| 3 | Baseline | Body Regions | Grouped classes (Upper/Lower) |\n",
1633
  "| 4 | Tuned | Body Regions | GridSearchCV (5-fold CV) |\n",
1634
  "\n",
1635
+ "Note: Polynomial interaction features were tested but not included in final iterations due to minimal improvement and increased complexity (820 features vs 40)."
 
 
 
 
1636
  ]
1637
  }
1638
  ],
1639
  "metadata": {
1640
  "kernelspec": {
1641
+ "display_name": "Python 3 (ipykernel)",
1642
  "language": "python",
1643
  "name": "python3"
1644
  },
 
1652
  "name": "python",
1653
  "nbconvert_exporter": "python",
1654
  "pygments_lexer": "ipython3",
1655
+ "version": "3.12.3"
1656
  }
1657
  },
1658
  "nbformat": 4,