Spaces:

wi-lab
/

LWM-Spectro

Running

App Files Files Community

Namhyun Kim commited on 4 days ago

Commit

5b413ef

1 Parent(s): 1a85ed1

Fix t-SNE blank plots (infer embedding dims)

Browse files

Files changed (1) hide show

app.py +51 -7

app.py CHANGED Viewed

@@ -361,17 +361,47 @@ def apply_filters(
 def _select_tech_embedding(flat_embedding: np.ndarray | None, tech: str, embed_dim: Optional[int]) -> Optional[np.ndarray]:
-    if flat_embedding is None or embed_dim is None:
         return None
     total = flat_embedding.size
     blocks = len(TECH_EXPERT_ORDER)
-    if total % blocks != 0:
         return None
     try:
-        arr = flat_embedding.reshape(blocks, embed_dim)
     except ValueError:
         return None
-    tech_idx = TECH_TO_EXPERT_IDX.get(tech)
     if tech_idx is None or tech_idx >= arr.shape[0]:
         return arr.mean(axis=0)
     return arr[tech_idx]
@@ -416,7 +446,13 @@ def plot_tsne(
     filtered_df = apply_filters(df, tech_filter, snr_filter, mod_filter, mob_filter)
     sampled_df = _sample_balanced_by_snr(filtered_df, samples_per_snr, sampling_seed)
     if len(sampled_df) < 5:
-        return None
     sampled_df = sampled_df.copy()
     color_column = COLOR_OPTIONS.get(color_label, "snr")
@@ -424,14 +460,22 @@ def plot_tsne(
     if representation == "LWM Embedding":
         embed_mask = sampled_df["tech_embedding"].apply(lambda x: x is not None)
         if embed_mask.sum() < 5:
-            return None
         sampled_df = sampled_df.loc[embed_mask].reset_index(drop=True)
         features = np.stack(sampled_df["tech_embedding"].values)
     else:
         features = build_tsne_raw_vectors(sampled_df["spectrogram"])
     if features.size == 0:
-        return None
     features = _standardize_for_tsne(features)

 def _select_tech_embedding(flat_embedding: np.ndarray | None, tech: str, embed_dim: Optional[int]) -> Optional[np.ndarray]:
+    """Extract the technology-specific expert embedding.
+    Some artifacts don't include an explicit embedding dimension hint. In that case,
+    infer `embed_dim = total_dim / num_experts` when divisible.
+    """
+    if flat_embedding is None:
         return None
+    flat_embedding = np.asarray(flat_embedding).reshape(-1)
     total = flat_embedding.size
     blocks = len(TECH_EXPERT_ORDER)
+    if blocks <= 0:
+        return None
+    inferred_dim = embed_dim
+    if inferred_dim is None:
+        if total % blocks != 0:
+            return None
+        inferred_dim = total // blocks
+    try:
+        inferred_dim = int(inferred_dim)
+    except (TypeError, ValueError):
         return None
+    if inferred_dim <= 0:
+        return None
+    expected = blocks * inferred_dim
+    if expected != total:
+        # If metadata is wrong, don't crash; fall back to an even split only if possible.
+        if total % blocks != 0:
+            return None
+        inferred_dim = total // blocks
     try:
+        arr = flat_embedding.reshape(blocks, inferred_dim)
     except ValueError:
         return None
+    tech_idx = TECH_TO_EXPERT_IDX.get(str(tech))
     if tech_idx is None or tech_idx >= arr.shape[0]:
         return arr.mean(axis=0)
     return arr[tech_idx]
     filtered_df = apply_filters(df, tech_filter, snr_filter, mod_filter, mob_filter)
     sampled_df = _sample_balanced_by_snr(filtered_df, samples_per_snr, sampling_seed)
     if len(sampled_df) < 5:
+        fig = go.Figure()
+        fig.update_layout(
+            title=f"Not enough samples to plot (n={len(sampled_df)}). Widen filters or increase samples.",
+            xaxis=dict(visible=False),
+            yaxis=dict(visible=False),
+        )
+        return fig
     sampled_df = sampled_df.copy()
     color_column = COLOR_OPTIONS.get(color_label, "snr")
     if representation == "LWM Embedding":
         embed_mask = sampled_df["tech_embedding"].apply(lambda x: x is not None)
         if embed_mask.sum() < 5:
+            fig = go.Figure()
+            fig.update_layout(
+                title="No per-technology embeddings found for the selected filters.",
+                xaxis=dict(visible=False),
+                yaxis=dict(visible=False),
+            )
+            return fig
         sampled_df = sampled_df.loc[embed_mask].reset_index(drop=True)
         features = np.stack(sampled_df["tech_embedding"].values)
     else:
         features = build_tsne_raw_vectors(sampled_df["spectrogram"])
     if features.size == 0:
+        fig = go.Figure()
+        fig.update_layout(title="No features available for t-SNE.", xaxis=dict(visible=False), yaxis=dict(visible=False))
+        return fig
     features = _standardize_for_tsne(features)