“Namhyun-Kim” commited on
Commit
a073e0e
·
1 Parent(s): 87b6361

Clamp PCA and raise t-SNE cap

Browse files
Files changed (1) hide show
  1. app.py +25 -5
app.py CHANGED
@@ -226,8 +226,8 @@ def plot_tsne(tech_filter, snr_filter, mod_filter, mob_filter, representation, c
226
  filtered_df["y"] = filtered_df[tsne_cols[1]]
227
  else:
228
  sampled_df = filtered_df
229
- if len(sampled_df) > 1200:
230
- sampled_df = sampled_df.sample(n=1200, random_state=42)
231
  sampled_df = sampled_df.copy()
232
 
233
  if representation == "LWM Embedding":
@@ -288,9 +288,27 @@ def build_raw_feature_matrix(samples: pd.Series, max_components: int = 256) -> n
288
  matrix = np.nan_to_num(matrix, copy=False)
289
  scaler = StandardScaler()
290
  matrix = scaler.fit_transform(matrix)
291
- if max_components and matrix.shape[1] > max_components:
292
- projector = PCA(n_components=max_components, random_state=42)
293
- matrix = projector.fit_transform(matrix)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  return matrix
295
 
296
 
@@ -598,6 +616,8 @@ COLOR_OPTIONS: Dict[str, str] = {
598
  "Mobility": "mob",
599
  }
600
 
 
 
601
  default_tech = tech_choices[:1] if tech_choices else []
602
  initial_spec_mod_choices = TECH_TO_MODS.get(default_tech[0], mod_choices) if default_tech else mod_choices
603
 
 
226
  filtered_df["y"] = filtered_df[tsne_cols[1]]
227
  else:
228
  sampled_df = filtered_df
229
+ if len(sampled_df) > TSNE_SAMPLE_CAP:
230
+ sampled_df = sampled_df.sample(n=TSNE_SAMPLE_CAP, random_state=42)
231
  sampled_df = sampled_df.copy()
232
 
233
  if representation == "LWM Embedding":
 
288
  matrix = np.nan_to_num(matrix, copy=False)
289
  scaler = StandardScaler()
290
  matrix = scaler.fit_transform(matrix)
291
+ if max_components:
292
+ # Cap n_components to valid PCA range: <= min(n_samples-1, n_features)
293
+ n_samples, n_features = matrix.shape
294
+ if n_samples > 1:
295
+ max_valid = min(n_features, max(n_samples - 1, 1))
296
+ else:
297
+ max_valid = 1
298
+ target = min(max_components, max_valid)
299
+ if target < 1:
300
+ target = 1
301
+
302
+ if target < n_features:
303
+ projector = PCA(n_components=target, random_state=42)
304
+ try:
305
+ matrix = projector.fit_transform(matrix)
306
+ except ValueError:
307
+ safe_components = max(1, min(n_samples, n_features) - 1)
308
+ safe_components = min(safe_components, target)
309
+ if safe_components >= 1:
310
+ fallback = PCA(n_components=safe_components, random_state=42)
311
+ matrix = fallback.fit_transform(matrix)
312
  return matrix
313
 
314
 
 
616
  "Mobility": "mob",
617
  }
618
 
619
+ TSNE_SAMPLE_CAP = 6000
620
+
621
  default_tech = tech_choices[:1] if tech_choices else []
622
  initial_spec_mod_choices = TECH_TO_MODS.get(default_tech[0], mod_choices) if default_tech else mod_choices
623