Spaces:

Bachstelze
/

github_sync

Sleeping

App Files Files Community

Reem commited on 13 days ago

Commit

5437e78

1 Parent(s): 3e20c68

safe-dep-test?

Browse files

Files changed (5) hide show

A4/test_models.py +9 -5
A5/final_report_A5.ipynb +1 -1
Untitled.ipynb +107 -0
app.py +2 -2
requirements.txt +2 -0

A4/test_models.py CHANGED Viewed

@@ -79,9 +79,10 @@ class TestClassificationModelLoading:
     def test_classification_artifact_has_feature_columns(self, classification_artifact):
         assert "feature_columns" in classification_artifact
-    def test_classification_artifact_has_classes(self, classification_artifact):
-        # weaklink categories for the 14 classes
-        assert "weaklink_categories" in classification_artifact
     def test_classification_model_has_predict_method(self, classification_artifact):
         model = classification_artifact["model"]
@@ -90,8 +91,10 @@ class TestClassificationModelLoading:
     def test_classification_classes_match_expected(
         self, classification_artifact, expected_classification_classes
     ):
-        classes = list(classification_artifact["weaklink_categories"])
         assert sorted(classes) == sorted(expected_classification_classes)
 class TestClassificationModelPrediction:
@@ -138,8 +141,9 @@ class TestModelArtifactStructure:
     def test_regression_artifact_has_metrics(self, regression_artifact):
         assert "test_metrics" in regression_artifact
     def test_classification_artifact_has_metrics(self, classification_artifact):
-        assert "test_performance" in classification_artifact
     def test_regression_metrics_has_r2(self, regression_artifact):
         metrics = regression_artifact.get("test_metrics", {})

     def test_classification_artifact_has_feature_columns(self, classification_artifact):
         assert "feature_columns" in classification_artifact
+    def test_classification_model_has_classes(self, classification_artifact):
+        model = classification_artifact["model"]
+        assert hasattr(model, "classes_")
     def test_classification_model_has_predict_method(self, classification_artifact):
         model = classification_artifact["model"]
     def test_classification_classes_match_expected(
         self, classification_artifact, expected_classification_classes
     ):
+        classes = list(classification_artifact["model"].classes_)
         assert sorted(classes) == sorted(expected_classification_classes)
 class TestClassificationModelPrediction:
     def test_regression_artifact_has_metrics(self, regression_artifact):
         assert "test_metrics" in regression_artifact
     def test_classification_artifact_has_metrics(self, classification_artifact):
+        assert "test_metrics" in classification_artifact
     def test_regression_metrics_has_r2(self, regression_artifact):
         metrics = regression_artifact.get("test_metrics", {})

A5/final_report_A5.ipynb CHANGED Viewed

@@ -252,7 +252,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.3"
   }
  },
  "nbformat": 4,

    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.12.8"
   }
  },
  "nbformat": 4,

Untitled.ipynb ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "6de1a7e2-d47b-4de4-a006-882bc270d9d3",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'xgboost'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mModuleNotFoundError\u001b[39m                       Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mA5\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mCorrelationFilter\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m CorrelationFilter\n\u001b[32m      2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpickle\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mxgboost\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mxgb\u001b[39;00m\n\u001b[32m      4\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mlightgbm\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mlgb\u001b[39;00m\n\u001b[32m      5\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mA5/models/ensemble_classification_champion.pkl\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mrb\u001b[39m\u001b[33m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n",
+      "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'xgboost'"
+     ]
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "dff0f7ab-3b6e-4299-a61e-bb49a895167f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting xgboost\n",
+      "  Downloading xgboost-3.2.0-py3-none-macosx_12_0_arm64.whl.metadata (2.1 kB)\n",
+      "Collecting lightgbm\n",
+      "  Downloading lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl.metadata (17 kB)\n",
+      "Requirement already satisfied: numpy in /Users/reemothman/miniconda3/lib/python3.12/site-packages (from xgboost) (1.26.4)\n",
+      "Requirement already satisfied: scipy in /Users/reemothman/miniconda3/lib/python3.12/site-packages (from xgboost) (1.17.0)\n",
+      "Downloading xgboost-3.2.0-py3-none-macosx_12_0_arm64.whl (2.3 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading lightgbm-4.6.0-py3-none-macosx_12_0_arm64.whl (1.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m26.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hInstalling collected packages: xgboost, lightgbm\n",
+      "Successfully installed lightgbm-4.6.0 xgboost-3.2.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install xgboost lightgbm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "d93e2239-29aa-434e-a82f-cc00cde6064a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dict_keys(['model', 'model_name', 'scaler', 'feature_columns', 'cv_metrics', 'test_metrics', 'a4_champion_f1', 'improvement_pct'])\n"
+     ]
+    }
+   ],
+   "source": [
+    "from A5.CorrelationFilter import CorrelationFilter\n",
+    "import pickle\n",
+    "import xgboost as xgb\n",
+    "import lightgbm as lgb\n",
+    "with open(\"A5/models/ensemble_classification_champion.pkl\", \"rb\") as f:\n",
+    "    artifact = pickle.load(f)\n",
+    "\n",
+    "print(artifact.keys())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b96efe6a-86f0-4e45-94ed-98ff1da1b8e9",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

app.py CHANGED Viewed

@@ -15,8 +15,8 @@ MODEL_PATH = os.path.join(
 )
 CLASSIFICATION_MODEL_PATH = os.path.join(
     SCRIPT_DIR,
-    "A4/models/weaklink_classifier_rf.pkl",
-    #  new classifier without "classes" key "A5/models/weaklink_classifier_rfc_A4.pkl"
 )
 DATA_PATH = os.path.join(
     SCRIPT_DIR,

 )
 CLASSIFICATION_MODEL_PATH = os.path.join(
     SCRIPT_DIR,
+    "A5b/models/ensemble_classification_champion.pkl",
+    #  new classifier without "classes" key "A5b/models/ensemble_classification_champion.pkl"
 )
 DATA_PATH = os.path.join(
     SCRIPT_DIR,

requirements.txt CHANGED Viewed

@@ -5,6 +5,8 @@ scikit-learn==1.8.0
 statsmodels==0.14.4
 matplotlib==3.9.2
 gdown==5.2.0
 pytest==8.3.4
 pytest-cov==6.0.0

 statsmodels==0.14.4
 matplotlib==3.9.2
 gdown==5.2.0
+xgboost==3.2.0
+lightgbm==4.6.0
 pytest==8.3.4
 pytest-cov==6.0.0