Spaces:

Bachstelze
/

github_sync

Sleeping

App Files Files Community

RasaBh commited on 25 days ago

Commit

137c887

1 Parent(s): 98360d1

Improved regression task

Browse files

Files changed (3) hide show

A4/A4_Classification.ipynb +47 -13
A4/A4_Regression.ipynb +0 -0
A4/models/aimoscores_improved.pkl +3 -0

A4/A4_Classification.ipynb CHANGED Viewed

@@ -61,8 +61,7 @@
     ")\n",
     "from sklearn.pipeline import Pipeline\n",
     "from sklearn.ensemble import (\n",
-    "    RandomForestClassifier,\n",
-    "    GradientBoostingClassifier\n",
     ")\n",
     "from imblearn.over_sampling import SMOTE\n",
     "import pickle\n",
@@ -543,7 +542,7 @@
     "champion_region_f1 = results_region_df.iloc[0]['F1_mean']\n",
     "\n",
     "print(f\"\\nChampion (Stage 1): {champion_region}\")\n",
-    "print(f\"F1-Score: {champion_region_f1:.4f} ± {results_region_df.iloc[0]['F1_std']:.4f}\")"
    ]
   },
   {
@@ -611,7 +610,7 @@
     "champion_14class_f1 = results_14class_df.iloc[0]['F1_mean']\n",
     "\n",
     "print(f\"\\nChampion (Single Model): {champion_14class}\")\n",
-    "print(f\"F1-Score: {champion_14class_f1:.4f} ± {results_14class_df.iloc[0]['F1_std']:.4f}\")"
    ]
   },
   {
@@ -735,15 +734,23 @@
     "print(\"Comparison of Pipeline vs single model\")\n",
     "\n",
     "print(f\"\\nSingle Model ({champion_14class}):\")\n",
-    "print(f\"F1-Score: {champion_14class_f1:.4f} ± {results_14class_df.iloc[0]['F1_std']:.4f}\")\n",
     "\n",
     "print(f\"\\nHierarchical Pipeline:\")\n",
-    "print(f\"F1-Score: {pipeline_f1_scores.mean():.4f} ± {pipeline_f1_scores.std():.4f}\")\n",
     "\n",
     "improvement = pipeline_f1_scores.mean() - champion_14class_f1\n",
     "print(f\"\\nDifference: {improvement:+.4f} ({100*improvement/champion_14class_f1:+.2f}%)\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 16,
@@ -763,7 +770,6 @@
     }
    ],
    "source": [
-    "# Statistical test\n",
     "def corrected_resampled_ttest(scores_a, scores_b, n_train, n_test):\n",
     "    k = len(scores_a)\n",
     "    differences = scores_a - scores_b\n",
@@ -791,6 +797,14 @@
     "    print(f\"Result: NOT statistically significant\")\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 17,
@@ -809,7 +823,6 @@
     }
    ],
    "source": [
-    "# Visualization\n",
     "fig, ax = plt.subplots(figsize=(8, 6))\n",
     "\n",
     "models = [f'Single Model\\n({champion_14class})', 'Hierarchical\\nPipeline']\n",
@@ -828,7 +841,7 @@
     "for i, (bar, mean, std) in enumerate(zip(bars, means, stds)):\n",
     "    height = bar.get_height()\n",
     "    ax.text(bar.get_x() + bar.get_width()/2., height + std + 0.02,\n",
-    "            f'{mean:.4f}±{std:.4f}',\n",
     "            ha='center', va='bottom', fontsize=11, fontweight='bold')\n",
     "\n",
     "plt.tight_layout()\n",
@@ -938,6 +951,14 @@
     "    }"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 19,
@@ -974,11 +995,18 @@
     }
    ],
    "source": [
-    "# Classification report\n",
     "print(\"\\nClassification Report:\")\n",
     "print(classification_report(y_test_14, y_pred_test, zero_division=0))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 20,
@@ -1007,7 +1035,6 @@
     }
    ],
    "source": [
-    "# Class imbalance analysis - Using Random forest\n",
     "\n",
     "# Train Random Forest\n",
     "rf_model = RandomForestClassifier(\n",
@@ -1046,6 +1073,14 @@
     "print(f\"Improvement vs LDA:    {improvement_vs_lda:+.2f}%\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 21,
@@ -1079,7 +1114,7 @@
     }
    ],
    "source": [
-    "# Final comparison\n",
     "\n",
     "# Collect all results\n",
     "results_all = [\n",
@@ -1181,7 +1216,6 @@
    "source": [
     "# Detailed analysis of the champion model\n",
     "\n",
-    "# Determine which model won\n",
     "if best_approach['Approach'] == 'Random Forest':\n",
     "    best_model = rf_model\n",
     "    best_predictions = y_pred_rf\n",

     ")\n",
     "from sklearn.pipeline import Pipeline\n",
     "from sklearn.ensemble import (\n",
+    "    RandomForestClassifier\n",
     ")\n",
     "from imblearn.over_sampling import SMOTE\n",
     "import pickle\n",
     "champion_region_f1 = results_region_df.iloc[0]['F1_mean']\n",
     "\n",
     "print(f\"\\nChampion (Stage 1): {champion_region}\")\n",
+    "print(f\"F1-Score: {champion_region_f1:.4f} +/- {results_region_df.iloc[0]['F1_std']:.4f}\")"
    ]
   },
   {
     "champion_14class_f1 = results_14class_df.iloc[0]['F1_mean']\n",
     "\n",
     "print(f\"\\nChampion (Single Model): {champion_14class}\")\n",
+    "print(f\"F1-Score: {champion_14class_f1:.4f} +/- {results_14class_df.iloc[0]['F1_std']:.4f}\")"
    ]
   },
   {
     "print(\"Comparison of Pipeline vs single model\")\n",
     "\n",
     "print(f\"\\nSingle Model ({champion_14class}):\")\n",
+    "print(f\"F1-Score: {champion_14class_f1:.4f} +/- {results_14class_df.iloc[0]['F1_std']:.4f}\")\n",
     "\n",
     "print(f\"\\nHierarchical Pipeline:\")\n",
+    "print(f\"F1-Score: {pipeline_f1_scores.mean():.4f} +/- {pipeline_f1_scores.std():.4f}\")\n",
     "\n",
     "improvement = pipeline_f1_scores.mean() - champion_14class_f1\n",
     "print(f\"\\nDifference: {improvement:+.4f} ({100*improvement/champion_14class_f1:+.2f}%)\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "d284b4e6-8596-4806-847b-84db5f41d1dd",
+   "metadata": {},
+   "source": [
+    "### Statistical test"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 16,
     }
    ],
    "source": [
     "def corrected_resampled_ttest(scores_a, scores_b, n_train, n_test):\n",
     "    k = len(scores_a)\n",
     "    differences = scores_a - scores_b\n",
     "    print(f\"Result: NOT statistically significant\")\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "281ae294-b939-41af-bdda-030e77aefe08",
+   "metadata": {},
+   "source": [
+    "### Visualization"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 17,
     }
    ],
    "source": [
     "fig, ax = plt.subplots(figsize=(8, 6))\n",
     "\n",
     "models = [f'Single Model\\n({champion_14class})', 'Hierarchical\\nPipeline']\n",
     "for i, (bar, mean, std) in enumerate(zip(bars, means, stds)):\n",
     "    height = bar.get_height()\n",
     "    ax.text(bar.get_x() + bar.get_width()/2., height + std + 0.02,\n",
+    "            f'{mean:.4f}+/-{std:.4f}',\n",
     "            ha='center', va='bottom', fontsize=11, fontweight='bold')\n",
     "\n",
     "plt.tight_layout()\n",
     "    }"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "898490a6-59e2-4c42-bc98-943d9ac1c6c0",
+   "metadata": {},
+   "source": [
+    "### Classification report"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 19,
     }
    ],
    "source": [
     "print(\"\\nClassification Report:\")\n",
     "print(classification_report(y_test_14, y_pred_test, zero_division=0))"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "d8004001-9372-4c40-85e7-179f80ce6a7a",
+   "metadata": {},
+   "source": [
+    "### Class imbalance analysis - Using Random forest\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 20,
     }
    ],
    "source": [
     "\n",
     "# Train Random Forest\n",
     "rf_model = RandomForestClassifier(\n",
     "print(f\"Improvement vs LDA:    {improvement_vs_lda:+.2f}%\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "0b16e409-7698-4f98-9c2e-20eec70213c4",
+   "metadata": {},
+   "source": [
+    "### Final comparison"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 21,
     }
    ],
    "source": [
+    "\n",
     "\n",
     "# Collect all results\n",
     "results_all = [\n",
    "source": [
     "# Detailed analysis of the champion model\n",
     "\n",
     "if best_approach['Approach'] == 'Random Forest':\n",
     "    best_model = rf_model\n",
     "    best_predictions = y_pred_rf\n",

A4/A4_Regression.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

A4/models/aimoscores_improved.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d63069b096d996ba193bb6fa6d1a73a01351617c122c16b9ed34f0c64c113df
+size 9630522