Spaces:
Runtime error
Runtime error
| from __future__ import annotations | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| from typing import Any | |
| import argparse | |
| import json | |
| import logging | |
| import sys | |
| from .config import load_app_config | |
| from .demo_packs import load_demo_pack | |
| from .logging_utils import setup_logging | |
| from .storage import SQLiteStore | |
| from .tracing import utc_now, write_trace_artifact | |
| class EvalResult: | |
| project: str | |
| pack_id: str | |
| passed: bool | |
| findings: list[str] | |
| result: dict[str, Any] | |
| trace_path: str | |
| def _expected_subset(expected: dict[str, Any], actual: dict[str, Any]) -> list[str]: | |
| issues = [] | |
| for key, value in expected.items(): | |
| if key not in actual: | |
| issues.append(f'missing key: {key}') | |
| elif actual[key] != value: | |
| issues.append(f'{key}: expected {value!r}, got {actual[key]!r}') | |
| return issues | |
| def run_eval_for_project(project_module: str, pack_path: str | Path, db_path: str | Path | None = None) -> EvalResult: | |
| mod = __import__(project_module, fromlist=['create_project_spec']) | |
| spec = mod.create_project_spec() | |
| pack = load_demo_pack(pack_path) | |
| config = load_app_config(project_key=spec.key, data_subdir=spec.data_subdir) | |
| if db_path is not None: | |
| config = config.__class__( | |
| project_key=config.project_key, | |
| app_mode=config.app_mode, | |
| root_dir=config.root_dir, | |
| data_dir=config.data_dir, | |
| sqlite_path=Path(db_path), | |
| artifact_dir=config.artifact_dir, | |
| cache_dir=config.cache_dir, | |
| model_registry_path=config.model_registry_path, | |
| ) | |
| started_at = utc_now() | |
| store = SQLiteStore(config.sqlite_path, config.artifact_dir) | |
| try: | |
| result = spec.run_pack(pack, store, config) | |
| expected = pack.expected_signals | |
| findings = _expected_subset(expected, result) | |
| passed = not findings | |
| finished_at = utc_now() | |
| trace_path = write_trace_artifact( | |
| config.artifact_dir, | |
| { | |
| 'kind': 'eval', | |
| 'project': spec.key, | |
| 'pack_id': pack.pack_id, | |
| 'pack_path': str(pack_path), | |
| 'started_at': started_at, | |
| 'finished_at': finished_at, | |
| 'passed': passed, | |
| 'findings': findings, | |
| 'result': result, | |
| }, | |
| ) | |
| finally: | |
| store.close() | |
| return EvalResult(project=spec.key, pack_id=pack.pack_id, passed=passed, findings=findings, result=result, trace_path=str(trace_path)) | |
| def main() -> int: | |
| parser = argparse.ArgumentParser(description='Run golden-scenario evals for the ALL4 kit') | |
| parser.add_argument('project_module', help='Python module path, e.g. apps.p1_elder_paperwork.app') | |
| parser.add_argument('pack_path', help='Path to a demo pack folder') | |
| parser.add_argument('--db-path', help='Optional SQLite path for the run') | |
| parser.add_argument( | |
| '--json-only', | |
| action='store_true', | |
| help='Emit exactly one JSON object to stdout (no logging, no pretty-print).', | |
| ) | |
| parser.add_argument( | |
| '--quiet', | |
| '--no-log', | |
| dest='quiet', | |
| action='store_true', | |
| help='Disable JSONL logging (useful when piping stdout).', | |
| ) | |
| parser.add_argument( | |
| '--log-level', | |
| choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], | |
| default='INFO', | |
| help='Logging threshold for the JSONL status line.', | |
| ) | |
| args = parser.parse_args() | |
| logger = None | |
| if not args.quiet and not args.json_only: | |
| logger = setup_logging('app_kit.eval_runner', level=getattr(logging, args.log_level), stream=sys.stderr) | |
| result = run_eval_for_project(args.project_module, args.pack_path, args.db_path) | |
| if logger is not None: | |
| logger.info('eval completed: %s', json.dumps(result.__dict__, ensure_ascii=False)) | |
| if args.json_only: | |
| print(json.dumps(result.__dict__, ensure_ascii=False)) | |
| else: | |
| print(json.dumps(result.__dict__, indent=2, ensure_ascii=False)) | |
| return 0 if result.passed else 1 | |
| if __name__ == '__main__': | |
| raise SystemExit(main()) | |