Spaces:
Runtime error
Runtime error
| """ | |
| ChessEcon Backend — Training Status Router | |
| REST endpoints for monitoring training progress. | |
| The actual training runs in the separate training/ service. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import json | |
| import glob | |
| import logging | |
| from pathlib import Path | |
| from fastapi import APIRouter, HTTPException | |
| logger = logging.getLogger(__name__) | |
| router = APIRouter(prefix="/api/training", tags=["training"]) | |
| CHECKPOINT_DIR = os.getenv("CHECKPOINT_DIR", "./training/checkpoints") | |
| SELFPLAY_DATA_DIR = os.getenv("SELFPLAY_DATA_DIR", "./training/data") | |
| async def training_status(): | |
| """Return current training status from checkpoint directory.""" | |
| checkpoint_dir = Path(CHECKPOINT_DIR) | |
| if not checkpoint_dir.exists(): | |
| return {"status": "not_started", "checkpoints": [], "latest_step": 0} | |
| checkpoints = sorted(checkpoint_dir.glob("step_*"), key=lambda p: p.stat().st_mtime) | |
| latest_step = 0 | |
| latest_metrics = {} | |
| if checkpoints: | |
| latest = checkpoints[-1] | |
| metrics_file = latest / "metrics.json" | |
| if metrics_file.exists(): | |
| with open(metrics_file) as f: | |
| latest_metrics = json.load(f) | |
| latest_step = int(latest.name.replace("step_", "")) | |
| return { | |
| "status": "running" if checkpoints else "not_started", | |
| "latest_step": latest_step, | |
| "checkpoints": [str(c.name) for c in checkpoints[-5:]], | |
| "latest_metrics": latest_metrics, | |
| } | |
| async def training_metrics(): | |
| """Return all training metrics from saved checkpoints.""" | |
| checkpoint_dir = Path(CHECKPOINT_DIR) | |
| if not checkpoint_dir.exists(): | |
| return {"metrics": []} | |
| all_metrics = [] | |
| for metrics_file in sorted(checkpoint_dir.glob("*/metrics.json")): | |
| try: | |
| with open(metrics_file) as f: | |
| all_metrics.append(json.load(f)) | |
| except Exception: | |
| pass | |
| return {"metrics": all_metrics} | |
| async def episode_count(): | |
| """Return count of collected self-play episodes.""" | |
| data_dir = Path(SELFPLAY_DATA_DIR) | |
| if not data_dir.exists(): | |
| return {"count": 0, "files": []} | |
| files = list(data_dir.glob("*.jsonl")) | |
| total = sum( | |
| sum(1 for _ in open(f)) for f in files | |
| ) | |
| return {"count": total, "files": [f.name for f in files[-5:]]} | |