Spaces:

Bachstelze
/

github_sync

Sleeping

App Files Files Community

Amol Kaushik commited on Feb 11

Commit

4bce895

1 Parent(s): f15dcea

google drive trial

Browse files

Files changed (3) hide show

A3/A3_Report.ipynb +50 -31
app.py +6 -16
requirements.txt +1 -1

A3/A3_Report.ipynb CHANGED Viewed

@@ -47,7 +47,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
    "id": "edbe3fbd",
    "metadata": {},
    "outputs": [],
@@ -77,7 +77,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
    "id": "23f1b38b",
    "metadata": {},
    "outputs": [
@@ -128,7 +128,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
    "id": "080ab472",
    "metadata": {},
    "outputs": [
@@ -200,7 +200,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
    "id": "438e27ae",
    "metadata": {},
    "outputs": [
@@ -298,7 +298,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
    "id": "7560ae66",
    "metadata": {},
    "outputs": [
@@ -335,7 +335,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
    "id": "9f17a88e",
    "metadata": {},
    "outputs": [
@@ -381,7 +381,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 67,
    "id": "d4c02996",
    "metadata": {},
    "outputs": [],
@@ -404,7 +404,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
    "id": "c8292b2b",
    "metadata": {},
    "outputs": [],
@@ -442,7 +442,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 69,
    "id": "b598aef7",
    "metadata": {},
    "outputs": [
@@ -475,7 +475,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 70,
    "id": "962743cc",
    "metadata": {},
    "outputs": [
@@ -603,7 +603,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 71,
    "id": "5c9efd5b",
    "metadata": {},
    "outputs": [
@@ -636,7 +636,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
    "id": "ce01a75f",
    "metadata": {},
    "outputs": [
@@ -814,7 +814,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 73,
    "id": "3e5e5e9b",
    "metadata": {},
    "outputs": [
@@ -849,7 +849,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
    "id": "4de69063",
    "metadata": {},
    "outputs": [
@@ -885,7 +885,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
    "id": "a994b1af",
    "metadata": {},
    "outputs": [
@@ -1035,7 +1035,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
    "id": "00f3eda4",
    "metadata": {},
    "outputs": [
@@ -1067,7 +1067,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
    "id": "6b03902f",
    "metadata": {},
    "outputs": [
@@ -1262,7 +1262,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
    "id": "0b3e066a",
    "metadata": {},
    "outputs": [
@@ -1398,7 +1398,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 79,
    "id": "d21c037d",
    "metadata": {},
    "outputs": [
@@ -1476,7 +1476,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
    "id": "4f01e27a",
    "metadata": {},
    "outputs": [
@@ -1484,24 +1484,43 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Champion model dictionary saved: final_champion_model_A3.pkl\n",
-      "\n",
-      "Note: The deployable Pipeline model is saved separately as models/classification_champion.pkl\n"
      ]
     }
    ],
    "source": [
-    "# Save the final champion model\n",
-    "# Note: For deployment, we create a sklearn Pipeline artifact that includes\n",
-    "# both StandardScaler and the classifier, saved as models/classification_champion.pkl\n",
-    "# This is done via create_model_artifact.py (utility script)\n",
     "\n",
-    "model_filename = 'final_champion_model_A3.pkl'\n",
-    "model_dictionary = {\"model\": final_model}\n",
     "with open(model_filename, 'wb') as f:\n",
     "    pickle.dump(model_dictionary, f)\n",
-    "print(f\"Champion model dictionary saved: {model_filename}\")\n",
-    "print(\"\\nNote: The deployable Pipeline model is saved separately as models/classification_champion.pkl\")"
    ]
   },
   {

   },
   {
    "cell_type": "code",
+   "execution_count": 121,
    "id": "edbe3fbd",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 122,
    "id": "23f1b38b",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 123,
    "id": "080ab472",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 124,
    "id": "438e27ae",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 125,
    "id": "7560ae66",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 126,
    "id": "9f17a88e",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 127,
    "id": "d4c02996",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 128,
    "id": "c8292b2b",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 129,
    "id": "b598aef7",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 130,
    "id": "962743cc",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 131,
    "id": "5c9efd5b",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 132,
    "id": "ce01a75f",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 133,
    "id": "3e5e5e9b",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 134,
    "id": "4de69063",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 135,
    "id": "a994b1af",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 136,
    "id": "00f3eda4",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 137,
    "id": "6b03902f",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 138,
    "id": "0b3e066a",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 139,
    "id": "d21c037d",
    "metadata": {},
    "outputs": [
   },
   {
    "cell_type": "code",
+   "execution_count": 142,
    "id": "4f01e27a",
    "metadata": {},
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Champion model saved: models/final_champion_model_A3.pkl\n",
+      "Keys in artifact: ['model', 'feature_columns', 'classes', 'test_metrics']\n",
+      "Number of features: 41\n",
+      "Classes: ['Lower Body', 'Upper Body']\n",
+      "Test F1: 0.8278, Accuracy: 0.8377\n"
      ]
     }
    ],
    "source": [
+    "# Save the final champion model with complete artifact for deployment\n",
+    "import os\n",
+    "os.makedirs('models', exist_ok=True)\n",
+    "\n",
+    "# Compute test metrics\n",
+    "test_f1 = f1_score(y_test_final, y_pred_final, average='weighted', zero_division=0)\n",
+    "test_acc = accuracy_score(y_test_final, y_pred_final)\n",
     "\n",
+    "# Create complete model artifact with all required fields for app.py\n",
+    "model_dictionary = {\n",
+    "    'model': final_model,\n",
+    "    'feature_columns': features_region,\n",
+    "    'classes': sorted(y_region.unique()),\n",
+    "    'test_metrics': {\n",
+    "        'f1_weighted': test_f1,\n",
+    "        'accuracy': test_acc\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "model_filename = 'models/final_champion_model_A3.pkl'\n",
     "with open(model_filename, 'wb') as f:\n",
     "    pickle.dump(model_dictionary, f)\n",
+    "\n",
+    "print(f'Champion model saved: {model_filename}')\n",
+    "print(f'Keys in artifact: {list(model_dictionary.keys())}')\n",
+    "print(f'Number of features: {len(features_region)}')\n",
+    "print(f\"Classes: {model_dictionary['classes']}\")\n",
+    "print(f'Test F1: {test_f1:.4f}, Accuracy: {test_acc:.4f}')"
    ]
   },
   {

app.py CHANGED Viewed

@@ -2,14 +2,14 @@ import gradio as gr
 import pandas as pd
 import pickle
 import os
-import requests
 # Get directory where this script is located
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 # Google Drive file IDs for model downloads
-MODEL_GDRIVE_ID = "1ORlU0OOCBkWXVO2UFAkXaKtXfkOH7w1t"  # champion_model_final_2.pkl
-CLASSIFICATION_MODEL_GDRIVE_ID = "1QYVd9sHZbI4Vp21bO2Zd1vTcRpcq9wJs"  # final_champion_model_A3.pkl
 # Local paths - models loaded from A3/models/ directory
 MODEL_PATH = os.path.join(SCRIPT_DIR, "A3/models/champion_model_final_2.pkl")
@@ -18,20 +18,10 @@ DATA_PATH = os.path.join(SCRIPT_DIR, "A3/A3_Data/train_dataset.csv")
 def download_from_gdrive(file_id, destination):
-    """Download a file from Google Drive."""
-    URL = "https://drive.google.com/uc?export=download"
-    session = requests.Session()
-    response = session.get(URL, params={'id': file_id, 'confirm': 't'}, stream=True)
-    # Create directory if needed
     os.makedirs(os.path.dirname(destination), exist_ok=True)
-    with open(destination, "wb") as f:
-        for chunk in response.iter_content(chunk_size=32768):
-            if chunk:
-                f.write(chunk)
     print(f"Downloaded to {destination}")
     return True

 import pandas as pd
 import pickle
 import os
+import gdown
 # Get directory where this script is located
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 # Google Drive file IDs for model downloads
+MODEL_GDRIVE_ID = "1ORlU0OOCBkWXVO2UFAkXaKtXfkOH7w1t"
+CLASSIFICATION_MODEL_GDRIVE_ID = "1qU6Q37CoToMxzBwori5V3_bonBIIb-K0"
 # Local paths - models loaded from A3/models/ directory
 MODEL_PATH = os.path.join(SCRIPT_DIR, "A3/models/champion_model_final_2.pkl")
 def download_from_gdrive(file_id, destination):
+    """Download a file from Google Drive using gdown."""
     os.makedirs(os.path.dirname(destination), exist_ok=True)
+    url = f"https://drive.google.com/uc?id={file_id}"
+    gdown.download(url, destination, quiet=False)
     print(f"Downloaded to {destination}")
     return True

requirements.txt CHANGED Viewed

@@ -4,4 +4,4 @@ numpy>=1.24.0
 scikit-learn==1.7.2
 statsmodels>=0.14.0
 matplotlib>=3.7.0
-requests>=2.28.0

 scikit-learn==1.7.2
 statsmodels>=0.14.0
 matplotlib>=3.7.0
+gdown>=4.7.0