Spaces:
Running
Running
Wagtail commited on
Commit ·
4681753
1
Parent(s): bbdb075
Update A2_Report.ipynb
Browse files- A2/A2_Report.ipynb +5 -5
A2/A2_Report.ipynb
CHANGED
|
@@ -51,7 +51,7 @@
|
|
| 51 |
"Dataset\n",
|
| 52 |
"Training samples: 1599\n",
|
| 53 |
"Test samples: 400\n",
|
| 54 |
-
"
|
| 55 |
"Total features: 40\n",
|
| 56 |
"\n",
|
| 57 |
"Feature types:\n",
|
|
@@ -76,14 +76,14 @@
|
|
| 76 |
"raw_train_df = pd.read_csv(os.path.join(DATA_DIR, \"A2_dataset_80.csv\"))\n",
|
| 77 |
"raw_test_df = pd.read_csv(os.path.join(DATA_DIR, \"A2_dataset_20.csv\"))\n",
|
| 78 |
"\n",
|
| 79 |
-
"#
|
| 80 |
"train_df = pd.read_csv(os.path.join(DATA_DIR, \"train.csv\"))\n",
|
| 81 |
"test_df = pd.read_csv(os.path.join(DATA_DIR, \"test.csv\"))\n",
|
| 82 |
"\n",
|
| 83 |
"print(\"Dataset\")\n",
|
| 84 |
"print(f\"Training samples: {len(train_df)}\")\n",
|
| 85 |
"print(f\"Test samples: {len(test_df)}\")\n",
|
| 86 |
-
"print(\"
|
| 87 |
"print(f\"Total features: {len(train_df.columns) - 2}\")\n",
|
| 88 |
"print(f\"\\nFeature types:\")\n",
|
| 89 |
"print(f\" - FMS Angle deviations (body joints): 13 features\")\n",
|
|
@@ -138,7 +138,7 @@
|
|
| 138 |
"plt.tight_layout()\n",
|
| 139 |
"plt.show()\n",
|
| 140 |
"\n",
|
| 141 |
-
"# plot the
|
| 142 |
"fig, axes = plt.subplots(1, 2, figsize=(12, 4))\n",
|
| 143 |
"\n",
|
| 144 |
"# Target distribution\n",
|
|
@@ -148,7 +148,7 @@
|
|
| 148 |
"\n",
|
| 149 |
"# Box plot\n",
|
| 150 |
"sns.boxplot(x=train_df['AimoScore'], ax=axes[1], color='steelblue')\n",
|
| 151 |
-
"axes[1].set_title('
|
| 152 |
"\n",
|
| 153 |
"plt.tight_layout()\n",
|
| 154 |
"plt.show()"
|
|
|
|
| 51 |
"Dataset\n",
|
| 52 |
"Training samples: 1599\n",
|
| 53 |
"Test samples: 400\n",
|
| 54 |
+
"Deduplicated samples: 95\n",
|
| 55 |
"Total features: 40\n",
|
| 56 |
"\n",
|
| 57 |
"Feature types:\n",
|
|
|
|
| 76 |
"raw_train_df = pd.read_csv(os.path.join(DATA_DIR, \"A2_dataset_80.csv\"))\n",
|
| 77 |
"raw_test_df = pd.read_csv(os.path.join(DATA_DIR, \"A2_dataset_20.csv\"))\n",
|
| 78 |
"\n",
|
| 79 |
+
"# deduplicated dataset\n",
|
| 80 |
"train_df = pd.read_csv(os.path.join(DATA_DIR, \"train.csv\"))\n",
|
| 81 |
"test_df = pd.read_csv(os.path.join(DATA_DIR, \"test.csv\"))\n",
|
| 82 |
"\n",
|
| 83 |
"print(\"Dataset\")\n",
|
| 84 |
"print(f\"Training samples: {len(train_df)}\")\n",
|
| 85 |
"print(f\"Test samples: {len(test_df)}\")\n",
|
| 86 |
+
"print(\"Deduplicated samples: 95\")\n",
|
| 87 |
"print(f\"Total features: {len(train_df.columns) - 2}\")\n",
|
| 88 |
"print(f\"\\nFeature types:\")\n",
|
| 89 |
"print(f\" - FMS Angle deviations (body joints): 13 features\")\n",
|
|
|
|
| 138 |
"plt.tight_layout()\n",
|
| 139 |
"plt.show()\n",
|
| 140 |
"\n",
|
| 141 |
+
"# plot the deduplicated data\n",
|
| 142 |
"fig, axes = plt.subplots(1, 2, figsize=(12, 4))\n",
|
| 143 |
"\n",
|
| 144 |
"# Target distribution\n",
|
|
|
|
| 148 |
"\n",
|
| 149 |
"# Box plot\n",
|
| 150 |
"sns.boxplot(x=train_df['AimoScore'], ax=axes[1], color='steelblue')\n",
|
| 151 |
+
"axes[1].set_title('Deduplicated AimoScore Boxplot')\n",
|
| 152 |
"\n",
|
| 153 |
"plt.tight_layout()\n",
|
| 154 |
"plt.show()"
|