Hafnium49 commited on
Commit
0c41843
·
verified ·
1 Parent(s): c02b8ff

Deploy weighted clustering presets with 11 physics perspectives

Browse files
app.py CHANGED
@@ -16,7 +16,6 @@ import argparse
16
  import json
17
  import os
18
  import sys
19
- from functools import lru_cache
20
  from pathlib import Path
21
 
22
  from dotenv import load_dotenv
@@ -56,8 +55,8 @@ DASH_I18N = {
56
  ),
57
  "c1_title": "Band gap distribution over material families",
58
  "c2_title": "Compare band gap distribution of material types",
59
- "c3_title": "Principal Component Analysis (PCA) for high-dimensional material embeddings",
60
- "c3_desc": "Reduces 32-dimensional material embeddings to 2D/3D to reveal clustering patterns.",
61
  "c4_title": "Look up materials by chemical family",
62
  "c5_title": "Band gap by material family and material type",
63
  "c6_title": "Show the top N material families",
@@ -101,9 +100,8 @@ DASH_I18N = {
101
  "chart_type_avg_cluster": "{type} — Avg Band Gap by Cluster",
102
  "chart_pca_2d": "PCA 2D Projection",
103
  "chart_pca_3d": "PCA 3D Projection",
104
- "lbl_weighted_pc1": "Weighted PC1 (Physics-Adaptive)",
105
- "lbl_weighted_pc2": "Weighted PC2 (Physics-Adaptive)",
106
- "lbl_weighted_pc3": "Weighted PC3 (Physics-Adaptive)",
107
  "chart_variance": "Explained Variance (%)",
108
  "chart_component": "Component",
109
  "chart_variance_pct": "Variance (%)",
@@ -159,8 +157,8 @@ DASH_I18N = {
159
  ),
160
  "c1_title": "材料ファミリーごとのバンドギャップ分布",
161
  "c2_title": "材料タイプ別バンドギャップ分布の比較",
162
- "c3_title": "高次元材料埋め込みの主成分分析(PCA)",
163
- "c3_desc": "32次元の材料埋め込みを2D/3Dに次元削減し、クラスタリングパタンを可視化。",
164
  "c4_title": "化学ファミリーで材料を検索",
165
  "c5_title": "材料ファミリーと材料タイプ別バンドギャップ",
166
  "c6_title": "上位N材料ファミリーを表示",
@@ -204,9 +202,8 @@ DASH_I18N = {
204
  "chart_type_avg_cluster": "{type} — クラスター別平均バンドギャップ",
205
  "chart_pca_2d": "PCA 2D射影",
206
  "chart_pca_3d": "PCA 3D射影",
207
- "lbl_weighted_pc1": "重み付きPC1(物理適応軸)",
208
- "lbl_weighted_pc2": "重み付きPC2(物理適応軸)",
209
- "lbl_weighted_pc3": "重み付きPC3(物理適応軸)",
210
  "chart_variance": "寄与率(%)",
211
  "chart_component": "成分",
212
  "chart_variance_pct": "寄与率(%)",
@@ -292,35 +289,7 @@ df = pd.read_csv(CSV)
292
  DIM_COLS = [f"dim_{i}" for i in range(32)]
293
  ALL_TYPES = ["Metallic", "Semiconductor", "Insulator"]
294
 
295
- # Load raw embedding vectors for runtime weighted PCA
296
- print("Loading raw embedding vectors for weighted PCA...")
297
- VECTORS_PATH = CACHE / "vectors.npy"
298
- ESEN_PATH = CACHE / "esen_vectors.npy"
299
-
300
- if VECTORS_PATH.exists() and ESEN_PATH.exists():
301
- vectors = np.load(VECTORS_PATH).astype(np.float32) # (33973, 2738)
302
- esen = np.load(ESEN_PATH).astype(np.float32) # (33973, 128)
303
-
304
- # Split into embedding spaces
305
- RAW_EMBEDDINGS = {
306
- "orb": vectors[:, :1792], # (33973, 1792)
307
- "mm": vectors[:, 1792:2550], # (33973, 758)
308
- "ofm": vectors[:, 2550:2738], # (33973, 188)
309
- "esen": esen, # (33973, 128)
310
- }
311
-
312
- mem_mb = sum(v.nbytes for v in RAW_EMBEDDINGS.values()) / (1024 * 1024)
313
- print(f" Loaded raw vectors: {mem_mb:.1f} MB in memory")
314
- print(f" orb: {RAW_EMBEDDINGS['orb'].shape}")
315
- print(f" mm: {RAW_EMBEDDINGS['mm'].shape}")
316
- print(f" ofm: {RAW_EMBEDDINGS['ofm'].shape}")
317
- print(f" esen: {RAW_EMBEDDINGS['esen'].shape}")
318
- else:
319
- print(" WARNING: Raw vectors not found. Runtime PCA disabled.")
320
- RAW_EMBEDDINGS = None
321
-
322
  # Compute PCA on balanced 32D UMAP for variance bar chart
323
- # Chart 3 scatter will use runtime weighted PCA instead
324
  X = StandardScaler().fit_transform(df[DIM_COLS].values)
325
  pca_full = PCA(n_components=20, random_state=42).fit(X)
326
  pc_all = pca_full.transform(X) # (33973, 20)
@@ -418,76 +387,20 @@ if S_ORB is not None and S_MM is not None and S_OFM is not None and S_ESEN is no
418
  else:
419
  print(" WARNING: Centroid similarity matrices not loaded. Custom mode unavailable.")
420
 
421
- # ── Weighted PCA computation ───────────────────────────────────────────
422
-
423
- def compute_weighted_pca(weights, n_dims=2):
424
- """
425
- Compute PCA on weighted concatenated vectors.
426
-
427
- This is the mathematically correct approach: weight-concatenate the raw
428
- embedding vectors, then compute PCA on the weighted space. Guarantees
429
- that PC0, PC1 capture maximum variance in the weighted combination.
430
-
431
- Args:
432
- weights: dict with keys 'orb', 'mm', 'ofm', 'esen' (normalized to sum=1)
433
- n_dims: int, 2 for 2D scatter, 3 for 3D scatter
434
-
435
- Returns:
436
- np.ndarray of shape (33973, n_dims) - PCA coordinates
437
-
438
- Performance: ~500ms (StandardScaler + PCA on 33,973 × 2,866 matrix)
439
- """
440
- if RAW_EMBEDDINGS is None:
441
- # Fallback: return zeros if raw vectors not loaded
442
- print("WARNING: RAW_EMBEDDINGS not available, returning zeros")
443
- return np.zeros((len(df), n_dims))
444
-
445
- # Weight-concatenate raw embedding vectors
446
- # Scale by sqrt(dim) to give equal importance per dimension, not per space
447
- weighted_vectors = np.concatenate([
448
- weights["orb"] * RAW_EMBEDDINGS["orb"] / np.sqrt(1792),
449
- weights["mm"] * RAW_EMBEDDINGS["mm"] / np.sqrt(758),
450
- weights["ofm"] * RAW_EMBEDDINGS["ofm"] / np.sqrt(188),
451
- weights["esen"] * RAW_EMBEDDINGS["esen"] / np.sqrt(128),
452
- ], axis=1) # Shape: (33973, 2866)
453
-
454
- # Standardize and compute PCA
455
- # StandardScaler ensures each dimension has mean=0, std=1
456
- scaler = StandardScaler()
457
- scaled = scaler.fit_transform(weighted_vectors)
458
-
459
- # PCA to extract top n_dims components
460
- pca = PCA(n_components=n_dims, random_state=42)
461
- pca_coords = pca.fit_transform(scaled) # Shape: (33973, n_dims)
462
-
463
- # Log explained variance for debugging
464
- explained_var = pca.explained_variance_ratio_.sum() * 100
465
- print(f" Weighted PCA: {explained_var:.1f}% variance explained (n_dims={n_dims})")
466
-
467
- return pca_coords
468
-
469
-
470
- @lru_cache(maxsize=128)
471
- def compute_weighted_pca_cached(weights_tuple, n_dims):
472
- """
473
- Cached wrapper for compute_weighted_pca.
474
-
475
- Args:
476
- weights_tuple: tuple (w_orb, w_mm, w_ofm, w_esen) - must be hashable
477
- n_dims: int
478
 
479
- Returns:
480
- np.ndarray of shape (33973, n_dims)
481
- """
482
- # Convert tuple back to dict
483
- weights = {
484
- "orb": weights_tuple[0],
485
- "mm": weights_tuple[1],
486
- "ofm": weights_tuple[2],
487
- "esen": weights_tuple[3],
488
- }
489
 
490
- return compute_weighted_pca(weights, n_dims)
491
 
492
  # ── Label resolution ───────────────────────────────────────────────────
493
 
@@ -700,10 +613,6 @@ app.layout = html.Div(
700
  dcc.Store(id="active-labels",
701
  data={"mode": "preset", "key": "balanced"}),
702
 
703
- # ── Debouncing for weight sliders ──────────────────────────────
704
- dcc.Store(id="weight-buffer", data=None),
705
- dcc.Interval(id="debounce-interval", interval=300, max_intervals=0),
706
-
707
  # ── Clustering control panel ────────────────────────────────────
708
  html.Div(style={**CARD, "borderLeft": "4px solid #1976d2"}, children=[
709
  html.H3(T["lbl_cluster_mode"],
@@ -769,11 +678,21 @@ app.layout = html.Div(
769
  html.Div(style={"display": "flex", "gap": "16px",
770
  "flexWrap": "wrap", "marginBottom": "12px"}, children=[
771
  html.Div([
772
- html.Div(T["lbl_components"], style=LABEL),
773
- dcc.Dropdown(id="c3-ndim", options=["2D", "3D"],
774
- value="2D", clearable=False,
775
- style={"width": "100px"}),
776
- ]),
 
 
 
 
 
 
 
 
 
 
777
  html.Div([
778
  html.Div(T["lbl_color_by"], style=LABEL),
779
  dcc.Dropdown(id="c3-color", options=color_by_options,
@@ -806,20 +725,12 @@ app.layout = html.Div(
806
  ]),
807
  html.Div(style={"display": "flex", "gap": "16px"}, children=[
808
  html.Div(
809
- dcc.Loading(
810
- id="loading-c3-scatter",
811
- type="circle",
812
- children=dcc.Graph(id="c3-scatter")
813
- ),
814
  id="c3-scatter-container",
815
  style={"flex": "1", "minWidth": 0}
816
  ),
817
  html.Div(
818
- dcc.Loading(
819
- id="loading-c3-variance",
820
- type="circle",
821
- children=dcc.Graph(id="c3-variance")
822
- ),
823
  id="c3-variance-container",
824
  style={"flex": "1", "minWidth": 0, "display": "none"}
825
  ),
@@ -1059,50 +970,6 @@ app.layout = html.Div(
1059
  # CALLBACKS
1060
  # ══════════════════════════════════════════════════════════════════════════
1061
 
1062
- # ── Buffer weight changes (instant, no PCA) ──────────────────────────
1063
- @callback(
1064
- Output("weight-buffer", "data"),
1065
- Output("debounce-interval", "max_intervals"),
1066
- Input("cw-orb", "value"),
1067
- Input("cw-mm", "value"),
1068
- Input("cw-ofm", "value"),
1069
- Input("cw-esen", "value"),
1070
- State("debounce-interval", "n_intervals"),
1071
- )
1072
- def buffer_weights(w_orb, w_mm, w_ofm, w_esen, n_intervals):
1073
- """Store weight changes immediately without triggering expensive PCA."""
1074
- import time
1075
- weights = [w_orb or 0.25, w_mm or 0.25, w_ofm or 0.25, w_esen or 0.25]
1076
- timestamp = time.time()
1077
-
1078
- # Reset interval counter to restart debounce timer
1079
- # max_intervals=1 means interval will fire once after 300ms
1080
- return {"weights": weights, "timestamp": timestamp}, 1
1081
-
1082
-
1083
- # ── Apply debounced weights (after 300ms of no changes) ─────────────
1084
- @callback(
1085
- Output("active-labels", "data", allow_duplicate=True),
1086
- Input("debounce-interval", "n_intervals"),
1087
- State("weight-buffer", "data"),
1088
- State("cluster-mode", "value"),
1089
- prevent_initial_call=True,
1090
- )
1091
- def apply_debounced_weights(n_intervals, buffer, mode):
1092
- """Apply weights after debounce delay (300ms of no slider changes)."""
1093
- if mode != "custom" or not buffer:
1094
- from dash.exceptions import PreventUpdate
1095
- raise PreventUpdate
1096
-
1097
- w = buffer["weights"]
1098
- total = sum(w)
1099
- if total > 0:
1100
- w = [x / total for x in w]
1101
- else:
1102
- w = [0.25, 0.25, 0.25, 0.25]
1103
-
1104
- return {"mode": "custom", "weights": w}
1105
-
1106
 
1107
  # ── Active labels computation (preset mode or initial) ────────────────
1108
  @callback(
@@ -1216,40 +1083,22 @@ def chart3(ndim, color_by, filter_type, topn_str, active_data):
1216
  clusters, families, displays = resolve_labels(active_data)
1217
  work = df.assign(Cluster=clusters, Family=families, FamilyDisplay=displays)
1218
 
1219
- # ===== Compute weighted PCA coordinates =====
1220
- # Extract weights from active_data
1221
- if active_data and active_data.get("mode") == "preset":
1222
- preset_key = active_data.get("key", "balanced")
1223
- weights_tuple = PRESETS.get(preset_key, (0.25, 0.25, 0.25, 0.25))
1224
- elif active_data and active_data.get("mode") == "custom":
1225
- weights_tuple = tuple(active_data.get("weights", [0.25, 0.25, 0.25, 0.25]))
1226
- else:
1227
- # Fallback to balanced
1228
- weights_tuple = (0.25, 0.25, 0.25, 0.25)
1229
-
1230
- # Normalize weights
1231
- w_sum = sum(weights_tuple)
1232
- if w_sum > 0:
1233
- weights = {
1234
- "orb": weights_tuple[0] / w_sum,
1235
- "mm": weights_tuple[1] / w_sum,
1236
- "ofm": weights_tuple[2] / w_sum,
1237
- "esen": weights_tuple[3] / w_sum,
1238
- }
1239
- else:
1240
- weights = {"orb": 0.25, "mm": 0.25, "ofm": 0.25, "esen": 0.25}
1241
 
1242
- # Compute weighted PCA coordinates (cached for performance)
1243
- n_dims = 3 if ndim == "3D" else 2
1244
- weights_normalized = (weights["orb"], weights["mm"], weights["ofm"], weights["esen"])
1245
- pca_coords = compute_weighted_pca_cached(weights_normalized, n_dims=n_dims)
1246
 
1247
- # Add dynamic coordinates to DataFrame
1248
- work = work.copy()
1249
- work["PC1_dynamic"] = pca_coords[:, 0]
1250
- work["PC2_dynamic"] = pca_coords[:, 1]
1251
- if n_dims == 3:
1252
- work["PC3_dynamic"] = pca_coords[:, 2]
1253
 
1254
  sub = work if filter_type == "All" else work[work["Type"] == filter_type]
1255
 
@@ -1265,28 +1114,40 @@ def chart3(ndim, color_by, filter_type, topn_str, active_data):
1265
  sub["Cluster"] = sub["Cluster"].astype(str)
1266
  color = "Cluster"
1267
 
1268
- if ndim == "3D":
1269
- fig1 = px.scatter_3d(sub, x="PC1_dynamic", y="PC2_dynamic", z="PC3_dynamic",
 
 
 
 
 
 
 
 
 
 
 
 
 
1270
  color=color, template="plotly_white",
1271
  title=T["chart_pca_3d"],
1272
  opacity=0.6, height=600)
1273
  fig1.update_traces(marker_size=2)
1274
- else:
1275
- fig1 = px.scatter(sub, x="PC1_dynamic", y="PC2_dynamic",
 
 
 
 
 
1276
  color=color, template="plotly_white",
1277
  title=T["chart_pca_2d"],
1278
  render_mode="webgl", opacity=0.6, height=600)
1279
  fig1.update_traces(marker_size=3)
1280
-
1281
- # Update axis labels to reflect weighted nature
1282
- fig1.update_xaxes(title_text=T.get("lbl_weighted_pc1", "Weighted PC1 (Physics-Adaptive)"))
1283
- fig1.update_yaxes(title_text=T.get("lbl_weighted_pc2", "Weighted PC2 (Physics-Adaptive)"))
1284
- if ndim == "3D":
1285
- fig1.update_layout(scene=dict(
1286
- xaxis_title=T.get("lbl_weighted_pc1", "Weighted PC1"),
1287
- yaxis_title=T.get("lbl_weighted_pc2", "Weighted PC2"),
1288
- zaxis_title=T.get("lbl_weighted_pc3", "Weighted PC3")
1289
- ))
1290
 
1291
  fig1.update_layout(legend=dict(font=dict(size=9)))
1292
 
@@ -1307,20 +1168,41 @@ def chart3(ndim, color_by, filter_type, topn_str, active_data):
1307
  Output("c3-variance-container", "style"),
1308
  Output("c3-var-toggle", "children"),
1309
  Input("c3-var-toggle", "n_clicks"),
1310
- prevent_initial_call=True,
1311
  )
1312
- def toggle_variance(n):
1313
- if n and n % 2 == 1:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1314
  return (
1315
  {"flex": "2", "minWidth": 0},
1316
  {"flex": "1", "minWidth": 0},
1317
  T["chart_variance"] + " \u25c0",
1318
  )
1319
- return (
1320
- {"flex": "1", "minWidth": 0},
1321
- {"flex": "1", "minWidth": 0, "display": "none"},
1322
- T["chart_variance"] + " \u25b6",
1323
- )
 
1324
 
1325
 
1326
  # ── 4 Lookup table ────────────────────────────────────────────────────────
 
16
  import json
17
  import os
18
  import sys
 
19
  from pathlib import Path
20
 
21
  from dotenv import load_dotenv
 
55
  ),
56
  "c1_title": "Band gap distribution over material families",
57
  "c2_title": "Compare band gap distribution of material types",
58
+ "c3_title": "Material Embedding Space Visualization",
59
+ "c3_desc": "Visualize via supervised UMAP (preset mode) or PCA (custom/fallback).",
60
  "c4_title": "Look up materials by chemical family",
61
  "c5_title": "Band gap by material family and material type",
62
  "c6_title": "Show the top N material families",
 
100
  "chart_type_avg_cluster": "{type} — Avg Band Gap by Cluster",
101
  "chart_pca_2d": "PCA 2D Projection",
102
  "chart_pca_3d": "PCA 3D Projection",
103
+ "chart_umap_2d": "UMAP 2D Projection (Supervised)",
104
+ "lbl_projection": "Projection Method",
 
105
  "chart_variance": "Explained Variance (%)",
106
  "chart_component": "Component",
107
  "chart_variance_pct": "Variance (%)",
 
157
  ),
158
  "c1_title": "材料ファミリーごとのバンドギャップ分布",
159
  "c2_title": "材料タイプ別バンドギャップ分布の比較",
160
+ "c3_title": "材料埋め込み空間可視化",
161
+ "c3_desc": "教師付きUMAP(プリセット)またはPCA(カスタム/フォルバック)で可視化。",
162
  "c4_title": "化学ファミリーで材料を検索",
163
  "c5_title": "材料ファミリーと材料タイプ別バンドギャップ",
164
  "c6_title": "上位N材料ファミリーを表示",
 
202
  "chart_type_avg_cluster": "{type} — クラスター別平均バンドギャップ",
203
  "chart_pca_2d": "PCA 2D射影",
204
  "chart_pca_3d": "PCA 3D射影",
205
+ "chart_umap_2d": "UMAP 2D射影(教師付き)",
206
+ "lbl_projection": "射影法",
 
207
  "chart_variance": "寄与率(%)",
208
  "chart_component": "成分",
209
  "chart_variance_pct": "寄与率(%)",
 
289
  DIM_COLS = [f"dim_{i}" for i in range(32)]
290
  ALL_TYPES = ["Metallic", "Semiconductor", "Insulator"]
291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  # Compute PCA on balanced 32D UMAP for variance bar chart
 
293
  X = StandardScaler().fit_transform(df[DIM_COLS].values)
294
  pca_full = PCA(n_components=20, random_state=42).fit(X)
295
  pc_all = pca_full.transform(X) # (33973, 20)
 
387
  else:
388
  print(" WARNING: Centroid similarity matrices not loaded. Custom mode unavailable.")
389
 
390
+ # ── Load per-preset UMAP 2D coordinates ────────────────────────────────
391
+ print("Loading per-preset UMAP 2D projections...")
392
+ PRESET_UMAP_2D = {}
393
+ for key in PRESET_KEYS:
394
+ umap_path = CACHE / f"umap_2d_{key}.npy"
395
+ if umap_path.exists():
396
+ PRESET_UMAP_2D[key] = np.load(umap_path).astype(np.float32)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
 
398
+ if PRESET_UMAP_2D:
399
+ print(f" Loaded {len(PRESET_UMAP_2D)} UMAP 2D projections (~{sum(v.nbytes for v in PRESET_UMAP_2D.values()) / (1024*1024):.1f} MB)")
400
+ else:
401
+ print(" WARNING: No UMAP 2D files found. Will use PCA fallback.")
 
 
 
 
 
 
402
 
403
+ HAS_UMAP = bool(PRESET_UMAP_2D)
404
 
405
  # ── Label resolution ───────────────────────────────────────────────────
406
 
 
613
  dcc.Store(id="active-labels",
614
  data={"mode": "preset", "key": "balanced"}),
615
 
 
 
 
 
616
  # ── Clustering control panel ────────────────────────────────────
617
  html.Div(style={**CARD, "borderLeft": "4px solid #1976d2"}, children=[
618
  html.H3(T["lbl_cluster_mode"],
 
678
  html.Div(style={"display": "flex", "gap": "16px",
679
  "flexWrap": "wrap", "marginBottom": "12px"}, children=[
680
  html.Div([
681
+ html.Div(T["lbl_projection"], style=LABEL),
682
+ dcc.Dropdown(
683
+ id="c3-ndim",
684
+ options=(
685
+ [{"label": "UMAP 2D", "value": "UMAP_2D"}]
686
+ if HAS_UMAP else []
687
+ ) + [
688
+ {"label": "PCA 2D", "value": "PCA_2D"},
689
+ {"label": "PCA 3D", "value": "PCA_3D"},
690
+ ],
691
+ value="UMAP_2D" if HAS_UMAP else "PCA_2D",
692
+ clearable=False,
693
+ style={"width": "150px"}
694
+ ),
695
+ ], style={"display": "flex", "gap": "8px", "alignItems": "center"}),
696
  html.Div([
697
  html.Div(T["lbl_color_by"], style=LABEL),
698
  dcc.Dropdown(id="c3-color", options=color_by_options,
 
725
  ]),
726
  html.Div(style={"display": "flex", "gap": "16px"}, children=[
727
  html.Div(
728
+ dcc.Graph(id="c3-scatter"),
 
 
 
 
729
  id="c3-scatter-container",
730
  style={"flex": "1", "minWidth": 0}
731
  ),
732
  html.Div(
733
+ dcc.Graph(id="c3-variance"),
 
 
 
 
734
  id="c3-variance-container",
735
  style={"flex": "1", "minWidth": 0, "display": "none"}
736
  ),
 
970
  # CALLBACKS
971
  # ══════════════════════════════════════════════════════════════════════════
972
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
973
 
974
  # ── Active labels computation (preset mode or initial) ────────────────
975
  @callback(
 
1083
  clusters, families, displays = resolve_labels(active_data)
1084
  work = df.assign(Cluster=clusters, Family=families, FamilyDisplay=displays)
1085
 
1086
+ # ===== Assign UMAP 2D coordinates if using UMAP mode =====
1087
+ if ndim == "UMAP_2D" and HAS_UMAP:
1088
+ # Determine which preset's UMAP to use
1089
+ preset_key = "balanced" # Default fallback
1090
+
1091
+ if active_data and active_data.get("mode") == "preset":
1092
+ preset_key = active_data.get("key", "balanced")
1093
+ # Custom mode: use balanced UMAP for fixed layout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1094
 
1095
+ # Load the appropriate UMAP coordinates
1096
+ umap_2d = PRESET_UMAP_2D.get(preset_key, PRESET_UMAP_2D.get("balanced"))
 
 
1097
 
1098
+ # Assign to DataFrame
1099
+ work = work.copy()
1100
+ work["UMAP_X"] = umap_2d[:, 0]
1101
+ work["UMAP_Y"] = umap_2d[:, 1]
 
 
1102
 
1103
  sub = work if filter_type == "All" else work[work["Type"] == filter_type]
1104
 
 
1114
  sub["Cluster"] = sub["Cluster"].astype(str)
1115
  color = "Cluster"
1116
 
1117
+ # ===== Generate scatter plot based on projection mode =====
1118
+ if ndim == "UMAP_2D":
1119
+ fig1 = px.scatter(
1120
+ sub, x="UMAP_X", y="UMAP_Y",
1121
+ color=color, template="plotly_white",
1122
+ title=T["chart_umap_2d"],
1123
+ render_mode="webgl", opacity=0.6, height=600
1124
+ )
1125
+ fig1.update_traces(marker_size=3)
1126
+ fig1.update_layout(
1127
+ xaxis_title="UMAP 1",
1128
+ yaxis_title="UMAP 2"
1129
+ )
1130
+ elif ndim == "PCA_3D":
1131
+ fig1 = px.scatter_3d(sub, x="PC1", y="PC2", z="PC3",
1132
  color=color, template="plotly_white",
1133
  title=T["chart_pca_3d"],
1134
  opacity=0.6, height=600)
1135
  fig1.update_traces(marker_size=2)
1136
+ fig1.update_layout(scene=dict(
1137
+ xaxis_title="PC1",
1138
+ yaxis_title="PC2",
1139
+ zaxis_title="PC3"
1140
+ ))
1141
+ else: # PCA_2D
1142
+ fig1 = px.scatter(sub, x="PC1", y="PC2",
1143
  color=color, template="plotly_white",
1144
  title=T["chart_pca_2d"],
1145
  render_mode="webgl", opacity=0.6, height=600)
1146
  fig1.update_traces(marker_size=3)
1147
+ fig1.update_layout(
1148
+ xaxis_title="PC1",
1149
+ yaxis_title="PC2"
1150
+ )
 
 
 
 
 
 
1151
 
1152
  fig1.update_layout(legend=dict(font=dict(size=9)))
1153
 
 
1168
  Output("c3-variance-container", "style"),
1169
  Output("c3-var-toggle", "children"),
1170
  Input("c3-var-toggle", "n_clicks"),
1171
+ Input("c3-ndim", "value"),
1172
  )
1173
+ def toggle_variance(n, ndim):
1174
+ # Hide variance in UMAP mode (not meaningful for UMAP)
1175
+ if ndim == "UMAP_2D":
1176
+ return (
1177
+ {"flex": "1", "minWidth": 0},
1178
+ {"flex": "1", "minWidth": 0, "display": "none"},
1179
+ T["chart_variance"] + " \u25b6",
1180
+ )
1181
+
1182
+ # Existing toggle logic for PCA modes
1183
+ ctx = dash.callback_context
1184
+ if not ctx.triggered or ctx.triggered[0]["prop_id"] == "c3-ndim.value":
1185
+ # Initial state or mode change: hide variance
1186
+ return (
1187
+ {"flex": "1", "minWidth": 0},
1188
+ {"flex": "1", "minWidth": 0, "display": "none"},
1189
+ T["chart_variance"] + " \u25b6",
1190
+ )
1191
+
1192
+ # Toggle on click
1193
+ is_hidden = (n or 0) % 2 == 0
1194
+ if is_hidden:
1195
  return (
1196
  {"flex": "2", "minWidth": 0},
1197
  {"flex": "1", "minWidth": 0},
1198
  T["chart_variance"] + " \u25c0",
1199
  )
1200
+ else:
1201
+ return (
1202
+ {"flex": "1", "minWidth": 0},
1203
+ {"flex": "1", "minWidth": 0, "display": "none"},
1204
+ T["chart_variance"] + " \u25b6",
1205
+ )
1206
 
1207
 
1208
  # ── 4 Lookup table ────────────────────────────────────────────────────────
material_universe_cache/plotly_studio_export.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25ff56bbfd44a4d2d7db5af566fe2990d780a9c696ba2f50ca10ba8dfdb5e6d6
3
- size 20636286
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28d5a2616fc8dcd9469c0303f64b4eb61bb8c9bb07c8f614281076a93e87113e
3
+ size 13630877
material_universe_cache/umap_2d_balanced.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:677b4fcdc6bd77071d7d8cc0608690f07cfbd34fceb44b75cf149a1ee375c5e9
3
+ size 271912
material_universe_cache/umap_2d_chemical.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d1f19ef7c3de86c290a95a9795e3da53ceaec3ac7376ceb8b18953e80d2f952
3
+ size 271912
material_universe_cache/umap_2d_coord_energy.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69df0550f20b0afff1f02c81fa216dc07b808ea22b9a2e98e4d441db4a692b89
3
+ size 271912
material_universe_cache/umap_2d_electronic.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473c16cea222156875cc5200b74fc21f835b7438c79b605901a6b6b023047b95
3
+ size 271912
material_universe_cache/umap_2d_esen_only.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccd018765d5116fe982881e69fde569cabd1ca72ab5e418948e6dbb703d142fa
3
+ size 271912
material_universe_cache/umap_2d_mechanochem.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:607a5d46e9bb74c38cdf1e87648e4bf2414811a6d56a9cc63d318ca73b10e93a
3
+ size 271912
material_universe_cache/umap_2d_mm_only.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:990370651b0f798c0271428535b6dc09003138cd1ff7685146f65202aaecab40
3
+ size 271912
material_universe_cache/umap_2d_ofm_only.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80133c0680f7028b2007745f1da99cb47cfceaee942f3e2b076a016ee2cdf0d9
3
+ size 271912
material_universe_cache/umap_2d_orb_only.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65be8b7d5e99d94f6f7b07bb9c5df689a5a9ccb2e18c58f2f6e52e2e0b471f39
3
+ size 271912
material_universe_cache/umap_2d_stability.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4653177a621333abe5bf0d219c915f1e6552ab9718da0e0921a74839346c991d
3
+ size 271912
material_universe_cache/umap_2d_structural.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32874494dd779ca1674029c00db116144748118b44c14faab82686f93e518b87
3
+ size 271912