Spaces:

Hafnium49
/

material-universe

Sleeping

Hafnium49 commited on Feb 19

Commit

e79014e

verified ·

1 Parent(s): fc0c0a7

fix: pandas 3.0 compat — use DataFrame.sample() instead of groupby().apply()

Files changed (1) hide show

app.py CHANGED Viewed

@@ -890,21 +890,11 @@ def chart3(ndim, color_by, filter_type, topn_str, active_data):
         sub["Cluster"] = sub["Cluster"].astype(str)
         color = "Cluster"
-    # Stratified subsample for browser performance: cap at MAX_SCATTER points
-    # while preserving the distribution across colour groups.
     MAX_SCATTER = 30_000
     n_total = len(sub)
     if n_total > MAX_SCATTER:
-        group_col = color if color and color in sub.columns else "Family"
-        sub = (
-            sub.groupby(group_col, observed=True, group_keys=False)
-            .apply(
-                lambda g: g.sample(
-                    max(1, round(MAX_SCATTER * len(g) / n_total)),
-                    random_state=42,
-                )
-            )
-        )
     scatter_title_suffix = (
         f" ({len(sub):,} / {n_total:,})" if n_total > MAX_SCATTER else ""

         sub["Cluster"] = sub["Cluster"].astype(str)
         color = "Cluster"
+    # Subsample for browser performance: cap at MAX_SCATTER points.
     MAX_SCATTER = 30_000
     n_total = len(sub)
     if n_total > MAX_SCATTER:
+        sub = sub.sample(MAX_SCATTER, random_state=42)
     scatter_title_suffix = (
         f" ({len(sub):,} / {n_total:,})" if n_total > MAX_SCATTER else ""