| from typing import Any, Optional | |
| from pydantic import BaseModel | |
| class Dataset(BaseModel): | |
| name: str | |
| repo: str | |
| num_tasks: int | |
| url: str | |
| class Harness(BaseModel): | |
| name: str | |
| skills: list[str] | |
| is_oss: bool | |
| url: str | |
| class Model(BaseModel): | |
| name: str | |
| repo: str | None = None | |
| is_oss: bool | |
| num_params: int | |
| precision: str | |
| url: str | |
| class Environment(BaseModel): | |
| name: str | |
| config: Optional[dict[str, Any]] = None | |
| url: str | |
| class Metrics(BaseModel): | |
| score: float | |
| n_tasks: Optional[int] = None | |
| n_errors: Optional[int] = None | |
| n_input_tokens: Optional[int] = None | |
| n_cache_tokens: Optional[int] = None | |
| n_output_tokens: Optional[int] = None | |
| n_total_tokens: Optional[int] = None | |
| time_seconds: Optional[int] = None | |
| cost_usd: Optional[float] = None | |
| mean_input_tokens_per_task: Optional[int] = None | |
| mean_cache_tokens_per_task: Optional[int] = None | |
| mean_output_tokens_per_task: Optional[int] = None | |
| mean_tokens_per_task: Optional[int] = None | |
| mean_cost_usd_per_task: Optional[float] = None | |
| mean_time_seconds_per_task: Optional[int] = None | |
| class Result(BaseModel): | |
| dataset: Dataset | |
| harness: Harness | |
| model: Model | |
| environment: Environment | |
| metrics: Metrics | |