Spaces:
Running
Running
“Namhyun-Kim”
commited on
Commit
·
a073e0e
1
Parent(s):
87b6361
Clamp PCA and raise t-SNE cap
Browse files
app.py
CHANGED
|
@@ -226,8 +226,8 @@ def plot_tsne(tech_filter, snr_filter, mod_filter, mob_filter, representation, c
|
|
| 226 |
filtered_df["y"] = filtered_df[tsne_cols[1]]
|
| 227 |
else:
|
| 228 |
sampled_df = filtered_df
|
| 229 |
-
if len(sampled_df) >
|
| 230 |
-
sampled_df = sampled_df.sample(n=
|
| 231 |
sampled_df = sampled_df.copy()
|
| 232 |
|
| 233 |
if representation == "LWM Embedding":
|
|
@@ -288,9 +288,27 @@ def build_raw_feature_matrix(samples: pd.Series, max_components: int = 256) -> n
|
|
| 288 |
matrix = np.nan_to_num(matrix, copy=False)
|
| 289 |
scaler = StandardScaler()
|
| 290 |
matrix = scaler.fit_transform(matrix)
|
| 291 |
-
if max_components
|
| 292 |
-
|
| 293 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
return matrix
|
| 295 |
|
| 296 |
|
|
@@ -598,6 +616,8 @@ COLOR_OPTIONS: Dict[str, str] = {
|
|
| 598 |
"Mobility": "mob",
|
| 599 |
}
|
| 600 |
|
|
|
|
|
|
|
| 601 |
default_tech = tech_choices[:1] if tech_choices else []
|
| 602 |
initial_spec_mod_choices = TECH_TO_MODS.get(default_tech[0], mod_choices) if default_tech else mod_choices
|
| 603 |
|
|
|
|
| 226 |
filtered_df["y"] = filtered_df[tsne_cols[1]]
|
| 227 |
else:
|
| 228 |
sampled_df = filtered_df
|
| 229 |
+
if len(sampled_df) > TSNE_SAMPLE_CAP:
|
| 230 |
+
sampled_df = sampled_df.sample(n=TSNE_SAMPLE_CAP, random_state=42)
|
| 231 |
sampled_df = sampled_df.copy()
|
| 232 |
|
| 233 |
if representation == "LWM Embedding":
|
|
|
|
| 288 |
matrix = np.nan_to_num(matrix, copy=False)
|
| 289 |
scaler = StandardScaler()
|
| 290 |
matrix = scaler.fit_transform(matrix)
|
| 291 |
+
if max_components:
|
| 292 |
+
# Cap n_components to valid PCA range: <= min(n_samples-1, n_features)
|
| 293 |
+
n_samples, n_features = matrix.shape
|
| 294 |
+
if n_samples > 1:
|
| 295 |
+
max_valid = min(n_features, max(n_samples - 1, 1))
|
| 296 |
+
else:
|
| 297 |
+
max_valid = 1
|
| 298 |
+
target = min(max_components, max_valid)
|
| 299 |
+
if target < 1:
|
| 300 |
+
target = 1
|
| 301 |
+
|
| 302 |
+
if target < n_features:
|
| 303 |
+
projector = PCA(n_components=target, random_state=42)
|
| 304 |
+
try:
|
| 305 |
+
matrix = projector.fit_transform(matrix)
|
| 306 |
+
except ValueError:
|
| 307 |
+
safe_components = max(1, min(n_samples, n_features) - 1)
|
| 308 |
+
safe_components = min(safe_components, target)
|
| 309 |
+
if safe_components >= 1:
|
| 310 |
+
fallback = PCA(n_components=safe_components, random_state=42)
|
| 311 |
+
matrix = fallback.fit_transform(matrix)
|
| 312 |
return matrix
|
| 313 |
|
| 314 |
|
|
|
|
| 616 |
"Mobility": "mob",
|
| 617 |
}
|
| 618 |
|
| 619 |
+
TSNE_SAMPLE_CAP = 6000
|
| 620 |
+
|
| 621 |
default_tech = tech_choices[:1] if tech_choices else []
|
| 622 |
initial_spec_mod_choices = TECH_TO_MODS.get(default_tech[0], mod_choices) if default_tech else mod_choices
|
| 623 |
|