Commit ·
2a796f1
1
Parent(s): b8863d9
Visualizations of clustering models.
Browse filesSigned-off-by: Oliver Ladores <oliver.ladores@wvsu.edu.ph>
app.py
CHANGED
|
@@ -116,8 +116,6 @@ def _(dataset_encoded, mo):
|
|
| 116 |
{classification_report(y_test, y_pred_dectree)}
|
| 117 |
```
|
| 118 |
|
| 119 |
-
## Conclusion
|
| 120 |
-
|
| 121 |
{mo.callout("Classifiers don't work well with this dataset, let's try something else.", kind='info')}
|
| 122 |
""")
|
| 123 |
return (
|
|
@@ -172,12 +170,13 @@ def _(OrdinalEncoder, dataset, mo, pl):
|
|
| 172 |
|
| 173 |
# df_kmeans_parts = pl.DataFrame(labels_kmeans, schema=pl.String)
|
| 174 |
df_kmeans = X.with_columns(pl.lit(labels_kmeans, dtype=pl.String).alias('kmeans_cluster'))
|
|
|
|
| 175 |
|
| 176 |
return mo.vstack([
|
| 177 |
mo.md(f"""
|
| 178 |
## K-Means Clustering
|
| 179 |
|
| 180 |
-
### External Metrics
|
| 181 |
|
| 182 |
Adjusted Rand Index (ARI): {adjusted_rand_score(y, labels_kmeans)}
|
| 183 |
|
|
@@ -200,7 +199,7 @@ def _(OrdinalEncoder, dataset, mo, pl):
|
|
| 200 |
|
| 201 |
## Spectral Clustering
|
| 202 |
|
| 203 |
-
### External Metrics
|
| 204 |
|
| 205 |
Adjusted Rand Index (ARI): {adjusted_rand_score(y, labels_spec)}
|
| 206 |
|
|
@@ -222,12 +221,24 @@ def _(OrdinalEncoder, dataset, mo, pl):
|
|
| 222 |
|
| 223 |
{mo.callout("Unsupervised clustering techniques do perform reasonably well, but does not correlate to other labels.", 'info')}
|
| 224 |
"""),
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
])
|
| 232 |
|
| 233 |
|
|
|
|
| 116 |
{classification_report(y_test, y_pred_dectree)}
|
| 117 |
```
|
| 118 |
|
|
|
|
|
|
|
| 119 |
{mo.callout("Classifiers don't work well with this dataset, let's try something else.", kind='info')}
|
| 120 |
""")
|
| 121 |
return (
|
|
|
|
| 170 |
|
| 171 |
# df_kmeans_parts = pl.DataFrame(labels_kmeans, schema=pl.String)
|
| 172 |
df_kmeans = X.with_columns(pl.lit(labels_kmeans, dtype=pl.String).alias('kmeans_cluster'))
|
| 173 |
+
df_spec = X.with_columns(pl.lit(labels_spec, dtype=pl.String).alias('spectral_cluster'))
|
| 174 |
|
| 175 |
return mo.vstack([
|
| 176 |
mo.md(f"""
|
| 177 |
## K-Means Clustering
|
| 178 |
|
| 179 |
+
### External Metrics (Based on Cancer Stage Labels)
|
| 180 |
|
| 181 |
Adjusted Rand Index (ARI): {adjusted_rand_score(y, labels_kmeans)}
|
| 182 |
|
|
|
|
| 199 |
|
| 200 |
## Spectral Clustering
|
| 201 |
|
| 202 |
+
### External Metrics (Based on Cancer Stage Labels)
|
| 203 |
|
| 204 |
Adjusted Rand Index (ARI): {adjusted_rand_score(y, labels_spec)}
|
| 205 |
|
|
|
|
| 221 |
|
| 222 |
{mo.callout("Unsupervised clustering techniques do perform reasonably well, but does not correlate to other labels.", 'info')}
|
| 223 |
"""),
|
| 224 |
+
|
| 225 |
+
mo.hstack([
|
| 226 |
+
alt.Chart(df_kmeans, autosize='pad').mark_rect().encode(
|
| 227 |
+
alt.X('Genetic_Mutation:N'),
|
| 228 |
+
y='Tumor_Size_mm',
|
| 229 |
+
color='kmeans_cluster'
|
| 230 |
+
).properties(
|
| 231 |
+
width=325
|
| 232 |
+
).interactive(),
|
| 233 |
+
|
| 234 |
+
alt.Chart(df_spec, autosize='pad').mark_rect().encode(
|
| 235 |
+
alt.X('Genetic_Mutation:N'),
|
| 236 |
+
y='Tumor_Size_mm',
|
| 237 |
+
color='spectral_cluster'
|
| 238 |
+
).properties(
|
| 239 |
+
width=325
|
| 240 |
+
).interactive(),
|
| 241 |
+
])
|
| 242 |
])
|
| 243 |
|
| 244 |
|