Amol Kaushik commited on
Commit
4bce895
·
1 Parent(s): f15dcea

google drive trial

Browse files
Files changed (3) hide show
  1. A3/A3_Report.ipynb +50 -31
  2. app.py +6 -16
  3. requirements.txt +1 -1
A3/A3_Report.ipynb CHANGED
@@ -47,7 +47,7 @@
47
  },
48
  {
49
  "cell_type": "code",
50
- "execution_count": 61,
51
  "id": "edbe3fbd",
52
  "metadata": {},
53
  "outputs": [],
@@ -77,7 +77,7 @@
77
  },
78
  {
79
  "cell_type": "code",
80
- "execution_count": 62,
81
  "id": "23f1b38b",
82
  "metadata": {},
83
  "outputs": [
@@ -128,7 +128,7 @@
128
  },
129
  {
130
  "cell_type": "code",
131
- "execution_count": 63,
132
  "id": "080ab472",
133
  "metadata": {},
134
  "outputs": [
@@ -200,7 +200,7 @@
200
  },
201
  {
202
  "cell_type": "code",
203
- "execution_count": 64,
204
  "id": "438e27ae",
205
  "metadata": {},
206
  "outputs": [
@@ -298,7 +298,7 @@
298
  },
299
  {
300
  "cell_type": "code",
301
- "execution_count": 65,
302
  "id": "7560ae66",
303
  "metadata": {},
304
  "outputs": [
@@ -335,7 +335,7 @@
335
  },
336
  {
337
  "cell_type": "code",
338
- "execution_count": 66,
339
  "id": "9f17a88e",
340
  "metadata": {},
341
  "outputs": [
@@ -381,7 +381,7 @@
381
  },
382
  {
383
  "cell_type": "code",
384
- "execution_count": 67,
385
  "id": "d4c02996",
386
  "metadata": {},
387
  "outputs": [],
@@ -404,7 +404,7 @@
404
  },
405
  {
406
  "cell_type": "code",
407
- "execution_count": 68,
408
  "id": "c8292b2b",
409
  "metadata": {},
410
  "outputs": [],
@@ -442,7 +442,7 @@
442
  },
443
  {
444
  "cell_type": "code",
445
- "execution_count": 69,
446
  "id": "b598aef7",
447
  "metadata": {},
448
  "outputs": [
@@ -475,7 +475,7 @@
475
  },
476
  {
477
  "cell_type": "code",
478
- "execution_count": 70,
479
  "id": "962743cc",
480
  "metadata": {},
481
  "outputs": [
@@ -603,7 +603,7 @@
603
  },
604
  {
605
  "cell_type": "code",
606
- "execution_count": 71,
607
  "id": "5c9efd5b",
608
  "metadata": {},
609
  "outputs": [
@@ -636,7 +636,7 @@
636
  },
637
  {
638
  "cell_type": "code",
639
- "execution_count": 72,
640
  "id": "ce01a75f",
641
  "metadata": {},
642
  "outputs": [
@@ -814,7 +814,7 @@
814
  },
815
  {
816
  "cell_type": "code",
817
- "execution_count": 73,
818
  "id": "3e5e5e9b",
819
  "metadata": {},
820
  "outputs": [
@@ -849,7 +849,7 @@
849
  },
850
  {
851
  "cell_type": "code",
852
- "execution_count": 74,
853
  "id": "4de69063",
854
  "metadata": {},
855
  "outputs": [
@@ -885,7 +885,7 @@
885
  },
886
  {
887
  "cell_type": "code",
888
- "execution_count": 75,
889
  "id": "a994b1af",
890
  "metadata": {},
891
  "outputs": [
@@ -1035,7 +1035,7 @@
1035
  },
1036
  {
1037
  "cell_type": "code",
1038
- "execution_count": 76,
1039
  "id": "00f3eda4",
1040
  "metadata": {},
1041
  "outputs": [
@@ -1067,7 +1067,7 @@
1067
  },
1068
  {
1069
  "cell_type": "code",
1070
- "execution_count": 77,
1071
  "id": "6b03902f",
1072
  "metadata": {},
1073
  "outputs": [
@@ -1262,7 +1262,7 @@
1262
  },
1263
  {
1264
  "cell_type": "code",
1265
- "execution_count": 78,
1266
  "id": "0b3e066a",
1267
  "metadata": {},
1268
  "outputs": [
@@ -1398,7 +1398,7 @@
1398
  },
1399
  {
1400
  "cell_type": "code",
1401
- "execution_count": 79,
1402
  "id": "d21c037d",
1403
  "metadata": {},
1404
  "outputs": [
@@ -1476,7 +1476,7 @@
1476
  },
1477
  {
1478
  "cell_type": "code",
1479
- "execution_count": 80,
1480
  "id": "4f01e27a",
1481
  "metadata": {},
1482
  "outputs": [
@@ -1484,24 +1484,43 @@
1484
  "name": "stdout",
1485
  "output_type": "stream",
1486
  "text": [
1487
- "Champion model dictionary saved: final_champion_model_A3.pkl\n",
1488
- "\n",
1489
- "Note: The deployable Pipeline model is saved separately as models/classification_champion.pkl\n"
 
 
1490
  ]
1491
  }
1492
  ],
1493
  "source": [
1494
- "# Save the final champion model\n",
1495
- "# Note: For deployment, we create a sklearn Pipeline artifact that includes\n",
1496
- "# both StandardScaler and the classifier, saved as models/classification_champion.pkl\n",
1497
- "# This is done via create_model_artifact.py (utility script)\n",
 
 
 
1498
  "\n",
1499
- "model_filename = 'final_champion_model_A3.pkl'\n",
1500
- "model_dictionary = {\"model\": final_model}\n",
 
 
 
 
 
 
 
 
 
 
1501
  "with open(model_filename, 'wb') as f:\n",
1502
  " pickle.dump(model_dictionary, f)\n",
1503
- "print(f\"Champion model dictionary saved: {model_filename}\")\n",
1504
- "print(\"\\nNote: The deployable Pipeline model is saved separately as models/classification_champion.pkl\")"
 
 
 
 
1505
  ]
1506
  },
1507
  {
 
47
  },
48
  {
49
  "cell_type": "code",
50
+ "execution_count": 121,
51
  "id": "edbe3fbd",
52
  "metadata": {},
53
  "outputs": [],
 
77
  },
78
  {
79
  "cell_type": "code",
80
+ "execution_count": 122,
81
  "id": "23f1b38b",
82
  "metadata": {},
83
  "outputs": [
 
128
  },
129
  {
130
  "cell_type": "code",
131
+ "execution_count": 123,
132
  "id": "080ab472",
133
  "metadata": {},
134
  "outputs": [
 
200
  },
201
  {
202
  "cell_type": "code",
203
+ "execution_count": 124,
204
  "id": "438e27ae",
205
  "metadata": {},
206
  "outputs": [
 
298
  },
299
  {
300
  "cell_type": "code",
301
+ "execution_count": 125,
302
  "id": "7560ae66",
303
  "metadata": {},
304
  "outputs": [
 
335
  },
336
  {
337
  "cell_type": "code",
338
+ "execution_count": 126,
339
  "id": "9f17a88e",
340
  "metadata": {},
341
  "outputs": [
 
381
  },
382
  {
383
  "cell_type": "code",
384
+ "execution_count": 127,
385
  "id": "d4c02996",
386
  "metadata": {},
387
  "outputs": [],
 
404
  },
405
  {
406
  "cell_type": "code",
407
+ "execution_count": 128,
408
  "id": "c8292b2b",
409
  "metadata": {},
410
  "outputs": [],
 
442
  },
443
  {
444
  "cell_type": "code",
445
+ "execution_count": 129,
446
  "id": "b598aef7",
447
  "metadata": {},
448
  "outputs": [
 
475
  },
476
  {
477
  "cell_type": "code",
478
+ "execution_count": 130,
479
  "id": "962743cc",
480
  "metadata": {},
481
  "outputs": [
 
603
  },
604
  {
605
  "cell_type": "code",
606
+ "execution_count": 131,
607
  "id": "5c9efd5b",
608
  "metadata": {},
609
  "outputs": [
 
636
  },
637
  {
638
  "cell_type": "code",
639
+ "execution_count": 132,
640
  "id": "ce01a75f",
641
  "metadata": {},
642
  "outputs": [
 
814
  },
815
  {
816
  "cell_type": "code",
817
+ "execution_count": 133,
818
  "id": "3e5e5e9b",
819
  "metadata": {},
820
  "outputs": [
 
849
  },
850
  {
851
  "cell_type": "code",
852
+ "execution_count": 134,
853
  "id": "4de69063",
854
  "metadata": {},
855
  "outputs": [
 
885
  },
886
  {
887
  "cell_type": "code",
888
+ "execution_count": 135,
889
  "id": "a994b1af",
890
  "metadata": {},
891
  "outputs": [
 
1035
  },
1036
  {
1037
  "cell_type": "code",
1038
+ "execution_count": 136,
1039
  "id": "00f3eda4",
1040
  "metadata": {},
1041
  "outputs": [
 
1067
  },
1068
  {
1069
  "cell_type": "code",
1070
+ "execution_count": 137,
1071
  "id": "6b03902f",
1072
  "metadata": {},
1073
  "outputs": [
 
1262
  },
1263
  {
1264
  "cell_type": "code",
1265
+ "execution_count": 138,
1266
  "id": "0b3e066a",
1267
  "metadata": {},
1268
  "outputs": [
 
1398
  },
1399
  {
1400
  "cell_type": "code",
1401
+ "execution_count": 139,
1402
  "id": "d21c037d",
1403
  "metadata": {},
1404
  "outputs": [
 
1476
  },
1477
  {
1478
  "cell_type": "code",
1479
+ "execution_count": 142,
1480
  "id": "4f01e27a",
1481
  "metadata": {},
1482
  "outputs": [
 
1484
  "name": "stdout",
1485
  "output_type": "stream",
1486
  "text": [
1487
+ "Champion model saved: models/final_champion_model_A3.pkl\n",
1488
+ "Keys in artifact: ['model', 'feature_columns', 'classes', 'test_metrics']\n",
1489
+ "Number of features: 41\n",
1490
+ "Classes: ['Lower Body', 'Upper Body']\n",
1491
+ "Test F1: 0.8278, Accuracy: 0.8377\n"
1492
  ]
1493
  }
1494
  ],
1495
  "source": [
1496
+ "# Save the final champion model with complete artifact for deployment\n",
1497
+ "import os\n",
1498
+ "os.makedirs('models', exist_ok=True)\n",
1499
+ "\n",
1500
+ "# Compute test metrics\n",
1501
+ "test_f1 = f1_score(y_test_final, y_pred_final, average='weighted', zero_division=0)\n",
1502
+ "test_acc = accuracy_score(y_test_final, y_pred_final)\n",
1503
  "\n",
1504
+ "# Create complete model artifact with all required fields for app.py\n",
1505
+ "model_dictionary = {\n",
1506
+ " 'model': final_model,\n",
1507
+ " 'feature_columns': features_region,\n",
1508
+ " 'classes': sorted(y_region.unique()),\n",
1509
+ " 'test_metrics': {\n",
1510
+ " 'f1_weighted': test_f1,\n",
1511
+ " 'accuracy': test_acc\n",
1512
+ " }\n",
1513
+ "}\n",
1514
+ "\n",
1515
+ "model_filename = 'models/final_champion_model_A3.pkl'\n",
1516
  "with open(model_filename, 'wb') as f:\n",
1517
  " pickle.dump(model_dictionary, f)\n",
1518
+ "\n",
1519
+ "print(f'Champion model saved: {model_filename}')\n",
1520
+ "print(f'Keys in artifact: {list(model_dictionary.keys())}')\n",
1521
+ "print(f'Number of features: {len(features_region)}')\n",
1522
+ "print(f\"Classes: {model_dictionary['classes']}\")\n",
1523
+ "print(f'Test F1: {test_f1:.4f}, Accuracy: {test_acc:.4f}')"
1524
  ]
1525
  },
1526
  {
app.py CHANGED
@@ -2,14 +2,14 @@ import gradio as gr
2
  import pandas as pd
3
  import pickle
4
  import os
5
- import requests
6
 
7
  # Get directory where this script is located
8
  SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
9
 
10
  # Google Drive file IDs for model downloads
11
- MODEL_GDRIVE_ID = "1ORlU0OOCBkWXVO2UFAkXaKtXfkOH7w1t" # champion_model_final_2.pkl
12
- CLASSIFICATION_MODEL_GDRIVE_ID = "1QYVd9sHZbI4Vp21bO2Zd1vTcRpcq9wJs" # final_champion_model_A3.pkl
13
 
14
  # Local paths - models loaded from A3/models/ directory
15
  MODEL_PATH = os.path.join(SCRIPT_DIR, "A3/models/champion_model_final_2.pkl")
@@ -18,20 +18,10 @@ DATA_PATH = os.path.join(SCRIPT_DIR, "A3/A3_Data/train_dataset.csv")
18
 
19
 
20
  def download_from_gdrive(file_id, destination):
21
- """Download a file from Google Drive."""
22
- URL = "https://drive.google.com/uc?export=download"
23
-
24
- session = requests.Session()
25
- response = session.get(URL, params={'id': file_id, 'confirm': 't'}, stream=True)
26
-
27
- # Create directory if needed
28
  os.makedirs(os.path.dirname(destination), exist_ok=True)
29
-
30
- with open(destination, "wb") as f:
31
- for chunk in response.iter_content(chunk_size=32768):
32
- if chunk:
33
- f.write(chunk)
34
-
35
  print(f"Downloaded to {destination}")
36
  return True
37
 
 
2
  import pandas as pd
3
  import pickle
4
  import os
5
+ import gdown
6
 
7
  # Get directory where this script is located
8
  SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
9
 
10
  # Google Drive file IDs for model downloads
11
+ MODEL_GDRIVE_ID = "1ORlU0OOCBkWXVO2UFAkXaKtXfkOH7w1t"
12
+ CLASSIFICATION_MODEL_GDRIVE_ID = "1qU6Q37CoToMxzBwori5V3_bonBIIb-K0"
13
 
14
  # Local paths - models loaded from A3/models/ directory
15
  MODEL_PATH = os.path.join(SCRIPT_DIR, "A3/models/champion_model_final_2.pkl")
 
18
 
19
 
20
  def download_from_gdrive(file_id, destination):
21
+ """Download a file from Google Drive using gdown."""
 
 
 
 
 
 
22
  os.makedirs(os.path.dirname(destination), exist_ok=True)
23
+ url = f"https://drive.google.com/uc?id={file_id}"
24
+ gdown.download(url, destination, quiet=False)
 
 
 
 
25
  print(f"Downloaded to {destination}")
26
  return True
27
 
requirements.txt CHANGED
@@ -4,4 +4,4 @@ numpy>=1.24.0
4
  scikit-learn==1.7.2
5
  statsmodels>=0.14.0
6
  matplotlib>=3.7.0
7
- requests>=2.28.0
 
4
  scikit-learn==1.7.2
5
  statsmodels>=0.14.0
6
  matplotlib>=3.7.0
7
+ gdown>=4.7.0