Spaces:

wi-lab
/

LWM-Spectro

Running

App Files Files Community

“Namhyun-Kim” commited on Dec 2, 2025

Commit

a073e0e

1 Parent(s): 87b6361

Clamp PCA and raise t-SNE cap

Browse files

Files changed (1) hide show

app.py +25 -5

app.py CHANGED Viewed

@@ -226,8 +226,8 @@ def plot_tsne(tech_filter, snr_filter, mod_filter, mob_filter, representation, c
         filtered_df["y"] = filtered_df[tsne_cols[1]]
     else:
         sampled_df = filtered_df
-        if len(sampled_df) > 1200:
-            sampled_df = sampled_df.sample(n=1200, random_state=42)
         sampled_df = sampled_df.copy()
         if representation == "LWM Embedding":
@@ -288,9 +288,27 @@ def build_raw_feature_matrix(samples: pd.Series, max_components: int = 256) -> n
     matrix = np.nan_to_num(matrix, copy=False)
     scaler = StandardScaler()
     matrix = scaler.fit_transform(matrix)
-    if max_components and matrix.shape[1] > max_components:
-        projector = PCA(n_components=max_components, random_state=42)
-        matrix = projector.fit_transform(matrix)
     return matrix
@@ -598,6 +616,8 @@ COLOR_OPTIONS: Dict[str, str] = {
     "Mobility": "mob",
 }
 default_tech = tech_choices[:1] if tech_choices else []
 initial_spec_mod_choices = TECH_TO_MODS.get(default_tech[0], mod_choices) if default_tech else mod_choices

         filtered_df["y"] = filtered_df[tsne_cols[1]]
     else:
         sampled_df = filtered_df
+        if len(sampled_df) > TSNE_SAMPLE_CAP:
+            sampled_df = sampled_df.sample(n=TSNE_SAMPLE_CAP, random_state=42)
         sampled_df = sampled_df.copy()
         if representation == "LWM Embedding":
     matrix = np.nan_to_num(matrix, copy=False)
     scaler = StandardScaler()
     matrix = scaler.fit_transform(matrix)
+    if max_components:
+        # Cap n_components to valid PCA range: <= min(n_samples-1, n_features)
+        n_samples, n_features = matrix.shape
+        if n_samples > 1:
+            max_valid = min(n_features, max(n_samples - 1, 1))
+        else:
+            max_valid = 1
+        target = min(max_components, max_valid)
+        if target < 1:
+            target = 1
+        if target < n_features:
+            projector = PCA(n_components=target, random_state=42)
+            try:
+                matrix = projector.fit_transform(matrix)
+            except ValueError:
+                safe_components = max(1, min(n_samples, n_features) - 1)
+                safe_components = min(safe_components, target)
+                if safe_components >= 1:
+                    fallback = PCA(n_components=safe_components, random_state=42)
+                    matrix = fallback.fit_transform(matrix)
     return matrix
     "Mobility": "mob",
 }
+TSNE_SAMPLE_CAP = 6000
 default_tech = tech_choices[:1] if tech_choices else []
 initial_spec_mod_choices = TECH_TO_MODS.get(default_tech[0], mod_choices) if default_tech else mod_choices