Spaces:
Sleeping
Sleeping
Commit ·
8193db9
1
Parent(s): f800c3b
fix: handle inhomogeneous fold counts in statistical analysis
Browse files- webapp/benchmark.py +15 -6
webapp/benchmark.py
CHANGED
|
@@ -352,16 +352,25 @@ def _statistical_analysis(results: dict, task: str) -> dict:
|
|
| 352 |
return {}
|
| 353 |
|
| 354 |
# Extract scores per fold for each model
|
| 355 |
-
#
|
| 356 |
-
|
| 357 |
-
|
| 358 |
for name in model_names:
|
| 359 |
folds = results[name].get("folds", [])
|
| 360 |
-
|
| 361 |
scores = [f.get(primary, 0) for f in folds]
|
| 362 |
-
|
|
|
|
| 363 |
|
| 364 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
|
| 366 |
# Calculate ranks for each fold (row)
|
| 367 |
# Higher score = lower rank (1 is best). Using method='min' for competition ranking (ties get same best rank)
|
|
|
|
| 352 |
return {}
|
| 353 |
|
| 354 |
# Extract scores per fold for each model
|
| 355 |
+
# Only include models that have a consistent number of folds
|
| 356 |
+
temp_matrix = {}
|
| 357 |
+
max_folds = 0
|
| 358 |
for name in model_names:
|
| 359 |
folds = results[name].get("folds", [])
|
| 360 |
+
if not folds: continue
|
| 361 |
scores = [f.get(primary, 0) for f in folds]
|
| 362 |
+
temp_matrix[name] = scores
|
| 363 |
+
max_folds = max(max_folds, len(scores))
|
| 364 |
|
| 365 |
+
if max_folds == 0: return {}
|
| 366 |
+
|
| 367 |
+
# Final list of models that have the full fold count
|
| 368 |
+
valid_names = [n for n, s in temp_matrix.items() if len(s) == max_folds]
|
| 369 |
+
if len(valid_names) < 2: return {}
|
| 370 |
+
|
| 371 |
+
matrix = np.array([temp_matrix[n] for n in valid_names]).T # Shape: (n_folds, n_models)
|
| 372 |
+
n_folds = max_folds
|
| 373 |
+
model_names = valid_names # Update model_names to match matrix columns
|
| 374 |
|
| 375 |
# Calculate ranks for each fold (row)
|
| 376 |
# Higher score = lower rank (1 is best). Using method='min' for competition ranking (ties get same best rank)
|