computerscience-person commited on
Commit
2a796f1
·
1 Parent(s): b8863d9

Visualizations of clustering models.

Browse files

Signed-off-by: Oliver Ladores <oliver.ladores@wvsu.edu.ph>

Files changed (1) hide show
  1. app.py +21 -10
app.py CHANGED
@@ -116,8 +116,6 @@ def _(dataset_encoded, mo):
116
  {classification_report(y_test, y_pred_dectree)}
117
  ```
118
 
119
- ## Conclusion
120
-
121
  {mo.callout("Classifiers don't work well with this dataset, let's try something else.", kind='info')}
122
  """)
123
  return (
@@ -172,12 +170,13 @@ def _(OrdinalEncoder, dataset, mo, pl):
172
 
173
  # df_kmeans_parts = pl.DataFrame(labels_kmeans, schema=pl.String)
174
  df_kmeans = X.with_columns(pl.lit(labels_kmeans, dtype=pl.String).alias('kmeans_cluster'))
 
175
 
176
  return mo.vstack([
177
  mo.md(f"""
178
  ## K-Means Clustering
179
 
180
- ### External Metrics
181
 
182
  Adjusted Rand Index (ARI): {adjusted_rand_score(y, labels_kmeans)}
183
 
@@ -200,7 +199,7 @@ def _(OrdinalEncoder, dataset, mo, pl):
200
 
201
  ## Spectral Clustering
202
 
203
- ### External Metrics
204
 
205
  Adjusted Rand Index (ARI): {adjusted_rand_score(y, labels_spec)}
206
 
@@ -222,12 +221,24 @@ def _(OrdinalEncoder, dataset, mo, pl):
222
 
223
  {mo.callout("Unsupervised clustering techniques do perform reasonably well, but does not correlate to other labels.", 'info')}
224
  """),
225
-
226
- alt.Chart(df_kmeans, autosize='pad').mark_circle().encode(
227
- x='Genetic_Mutation',
228
- y='Tumor_Size_mm',
229
- color='kmeans_cluster'
230
- )
 
 
 
 
 
 
 
 
 
 
 
 
231
  ])
232
 
233
 
 
116
  {classification_report(y_test, y_pred_dectree)}
117
  ```
118
 
 
 
119
  {mo.callout("Classifiers don't work well with this dataset, let's try something else.", kind='info')}
120
  """)
121
  return (
 
170
 
171
  # df_kmeans_parts = pl.DataFrame(labels_kmeans, schema=pl.String)
172
  df_kmeans = X.with_columns(pl.lit(labels_kmeans, dtype=pl.String).alias('kmeans_cluster'))
173
+ df_spec = X.with_columns(pl.lit(labels_spec, dtype=pl.String).alias('spectral_cluster'))
174
 
175
  return mo.vstack([
176
  mo.md(f"""
177
  ## K-Means Clustering
178
 
179
+ ### External Metrics (Based on Cancer Stage Labels)
180
 
181
  Adjusted Rand Index (ARI): {adjusted_rand_score(y, labels_kmeans)}
182
 
 
199
 
200
  ## Spectral Clustering
201
 
202
+ ### External Metrics (Based on Cancer Stage Labels)
203
 
204
  Adjusted Rand Index (ARI): {adjusted_rand_score(y, labels_spec)}
205
 
 
221
 
222
  {mo.callout("Unsupervised clustering techniques do perform reasonably well, but does not correlate to other labels.", 'info')}
223
  """),
224
+
225
+ mo.hstack([
226
+ alt.Chart(df_kmeans, autosize='pad').mark_rect().encode(
227
+ alt.X('Genetic_Mutation:N'),
228
+ y='Tumor_Size_mm',
229
+ color='kmeans_cluster'
230
+ ).properties(
231
+ width=325
232
+ ).interactive(),
233
+
234
+ alt.Chart(df_spec, autosize='pad').mark_rect().encode(
235
+ alt.X('Genetic_Mutation:N'),
236
+ y='Tumor_Size_mm',
237
+ color='spectral_cluster'
238
+ ).properties(
239
+ width=325
240
+ ).interactive(),
241
+ ])
242
  ])
243
 
244