Hafnium49 commited on
Commit
e79014e
·
verified ·
1 Parent(s): fc0c0a7

fix: pandas 3.0 compat — use DataFrame.sample() instead of groupby().apply()

Browse files
Files changed (1) hide show
  1. app.py +2 -12
app.py CHANGED
@@ -890,21 +890,11 @@ def chart3(ndim, color_by, filter_type, topn_str, active_data):
890
  sub["Cluster"] = sub["Cluster"].astype(str)
891
  color = "Cluster"
892
 
893
- # Stratified subsample for browser performance: cap at MAX_SCATTER points
894
- # while preserving the distribution across colour groups.
895
  MAX_SCATTER = 30_000
896
  n_total = len(sub)
897
  if n_total > MAX_SCATTER:
898
- group_col = color if color and color in sub.columns else "Family"
899
- sub = (
900
- sub.groupby(group_col, observed=True, group_keys=False)
901
- .apply(
902
- lambda g: g.sample(
903
- max(1, round(MAX_SCATTER * len(g) / n_total)),
904
- random_state=42,
905
- )
906
- )
907
- )
908
 
909
  scatter_title_suffix = (
910
  f" ({len(sub):,} / {n_total:,})" if n_total > MAX_SCATTER else ""
 
890
  sub["Cluster"] = sub["Cluster"].astype(str)
891
  color = "Cluster"
892
 
893
+ # Subsample for browser performance: cap at MAX_SCATTER points.
 
894
  MAX_SCATTER = 30_000
895
  n_total = len(sub)
896
  if n_total > MAX_SCATTER:
897
+ sub = sub.sample(MAX_SCATTER, random_state=42)
 
 
 
 
 
 
 
 
 
898
 
899
  scatter_title_suffix = (
900
  f" ({len(sub):,} / {n_total:,})" if n_total > MAX_SCATTER else ""