Spaces:
Paused
Paused
File size: 2,455 Bytes
bc35a94 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | from enum import Enum
from typing import Optional
from pydantic import BaseModel
from pydantic import Field
class DifficultyTier(str, Enum):
easy = "easy"
medium = "medium"
hard = "hard"
class Action(BaseModel):
command: str = Field(min_length=1)
reasoning: Optional[str] = None
class Observation(BaseModel):
stdout: str
stderr: str
exit_code: int
working_directory: str
execution_time: float = Field(ge=0.0)
reward: float
done: bool
step_number: int = Field(ge=0)
max_steps: int = Field(gt=0)
# optional progress signals populated by the server-side reward engine.
# clients that care about shaped progress (training) read these. older
# clients simply ignore them.
grader_health: float = 0.0
grader_details: dict[str, bool | float | str] = Field(default_factory=dict)
ood_http_code: str = ""
class EnvironmentState(BaseModel):
episode_id: str = Field(min_length=1)
task_id: str = Field(min_length=1)
step_count: int = Field(ge=0)
max_steps: int = Field(gt=0)
done: bool
reward: float
class ResetRequest(BaseModel):
task_id: Optional[str] = None
class StepRequest(BaseModel):
action: Action
# optional episode id so concurrent rollouts don't clobber each other's
# session. older clients that omit it fall back to the most recently
# created episode on the server.
episode_id: Optional[str] = None
class StepResult(BaseModel):
observation: Observation
state: EnvironmentState
class TaskMetadata(BaseModel):
task_id: str = Field(min_length=1)
difficulty: DifficultyTier
description: str
max_steps: int = Field(gt=0)
time_limit: float = Field(gt=0.0)
base_filesystem_path: str
class RewardSignal(BaseModel):
health_delta: float
knowledge_delta: float = Field(ge=0.0)
action_penalty: float = Field(le=0.0)
total_reward: float
class DiagnosticTrigger(BaseModel):
fact_id: str = Field(min_length=1)
command_patterns: list[str] = Field(min_length=1)
reward: float = Field(gt=0.0)
class TaskScenarioState(BaseModel):
health: float = Field(ge=0.0, le=1.0)
done: bool
details: dict[str, bool | float | str]
class TaskScenarioDefinition(BaseModel):
metadata: TaskMetadata
requires_network_isolation: bool = True
allows_nested_sandbox: bool = False
diagnostic_triggers: list[DiagnosticTrigger] = Field(default_factory=list)
|