Spaces:
Sleeping
Sleeping
fix: pandas 3.0 compat — use DataFrame.sample() instead of groupby().apply()
Browse files
app.py
CHANGED
|
@@ -890,21 +890,11 @@ def chart3(ndim, color_by, filter_type, topn_str, active_data):
|
|
| 890 |
sub["Cluster"] = sub["Cluster"].astype(str)
|
| 891 |
color = "Cluster"
|
| 892 |
|
| 893 |
-
#
|
| 894 |
-
# while preserving the distribution across colour groups.
|
| 895 |
MAX_SCATTER = 30_000
|
| 896 |
n_total = len(sub)
|
| 897 |
if n_total > MAX_SCATTER:
|
| 898 |
-
|
| 899 |
-
sub = (
|
| 900 |
-
sub.groupby(group_col, observed=True, group_keys=False)
|
| 901 |
-
.apply(
|
| 902 |
-
lambda g: g.sample(
|
| 903 |
-
max(1, round(MAX_SCATTER * len(g) / n_total)),
|
| 904 |
-
random_state=42,
|
| 905 |
-
)
|
| 906 |
-
)
|
| 907 |
-
)
|
| 908 |
|
| 909 |
scatter_title_suffix = (
|
| 910 |
f" ({len(sub):,} / {n_total:,})" if n_total > MAX_SCATTER else ""
|
|
|
|
| 890 |
sub["Cluster"] = sub["Cluster"].astype(str)
|
| 891 |
color = "Cluster"
|
| 892 |
|
| 893 |
+
# Subsample for browser performance: cap at MAX_SCATTER points.
|
|
|
|
| 894 |
MAX_SCATTER = 30_000
|
| 895 |
n_total = len(sub)
|
| 896 |
if n_total > MAX_SCATTER:
|
| 897 |
+
sub = sub.sample(MAX_SCATTER, random_state=42)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 898 |
|
| 899 |
scatter_title_suffix = (
|
| 900 |
f" ({len(sub):,} / {n_total:,})" if n_total > MAX_SCATTER else ""
|