Spaces:
Running
Running
File size: 2,569 Bytes
4ec75cf bdb64b6 4ec75cf bdb64b6 4ec75cf bdb64b6 4ec75cf bdb64b6 4ec75cf bdb64b6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | # server/benchmark_store.py
# Persists benchmark results to disk so they survive server restarts.
# Used by both inference.py (CLI) and web_ui.py (frontend).
import json
import os
from datetime import datetime
from typing import List, Dict
_STORE_PATH = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
'results', 'run_history.json'
)
os.makedirs(os.path.dirname(_STORE_PATH), exist_ok=True)
def _load() -> List[Dict]:
"""Load all benchmark results from disk."""
if not os.path.exists(_STORE_PATH):
return []
try:
with open(_STORE_PATH, 'r', encoding='utf-8') as f:
data = json.load(f)
return data if isinstance(data, list) else []
except (json.JSONDecodeError, IOError):
return []
def _save(results: List[Dict]) -> None:
"""Save all benchmark results to disk."""
try:
with open(_STORE_PATH, 'w', encoding='utf-8') as f:
json.dump(results, f, indent=2, default=str)
except IOError as e:
print(f"[benchmark_store] WARNING: Could not save results: {e}")
def append_result(model: str, model_id: str, scores: Dict[str, float]) -> Dict:
"""Add a new benchmark result and persist to disk. Returns the saved entry."""
avg = round(sum(scores.values()) / max(len(scores), 1), 4)
entry = {
'model_name': model,
'model_id': model_id,
'scores': scores,
'average': avg,
'type': 'full_run',
'timestamp': datetime.utcnow().isoformat(),
}
results = _load()
results.append(entry)
_save(results)
return entry
def get_all() -> List[Dict]:
"""Return all benchmark results, newest first."""
results = _load()
for r in results:
if 'average' not in r and 'avg' in r:
r['average'] = r['avg']
if 'model_name' not in r and 'model' in r:
r['model_name'] = r['model']
return sorted(results, key=lambda x: x.get('timestamp', ''), reverse=True)
def get_leaderboard() -> List[Dict]:
"""Return deduplicated leaderboard: best score per model_id."""
results = _load()
best: Dict[str, Dict] = {}
for r in results:
mid = r.get('model_id', r.get('model_name', r.get('model', 'unknown')))
val = r.get('average', r.get('avg', 0))
best_val = best[mid].get('average', best[mid].get('avg', 0)) if mid in best else -1
if mid not in best or val > best_val:
best[mid] = r
return sorted(best.values(), key=lambda x: x.get('average', x.get('avg', 0)), reverse=True)
|