HPCOpenenv / sysadmin_env /models.py
huggingmenfordays's picture
deploy: ccyloopss/HPCOpenenv — with OPENENV_API_KEY auth guard
bc35a94
from enum import Enum
from typing import Optional
from pydantic import BaseModel
from pydantic import Field
class DifficultyTier(str, Enum):
easy = "easy"
medium = "medium"
hard = "hard"
class Action(BaseModel):
command: str = Field(min_length=1)
reasoning: Optional[str] = None
class Observation(BaseModel):
stdout: str
stderr: str
exit_code: int
working_directory: str
execution_time: float = Field(ge=0.0)
reward: float
done: bool
step_number: int = Field(ge=0)
max_steps: int = Field(gt=0)
# optional progress signals populated by the server-side reward engine.
# clients that care about shaped progress (training) read these. older
# clients simply ignore them.
grader_health: float = 0.0
grader_details: dict[str, bool | float | str] = Field(default_factory=dict)
ood_http_code: str = ""
class EnvironmentState(BaseModel):
episode_id: str = Field(min_length=1)
task_id: str = Field(min_length=1)
step_count: int = Field(ge=0)
max_steps: int = Field(gt=0)
done: bool
reward: float
class ResetRequest(BaseModel):
task_id: Optional[str] = None
class StepRequest(BaseModel):
action: Action
# optional episode id so concurrent rollouts don't clobber each other's
# session. older clients that omit it fall back to the most recently
# created episode on the server.
episode_id: Optional[str] = None
class StepResult(BaseModel):
observation: Observation
state: EnvironmentState
class TaskMetadata(BaseModel):
task_id: str = Field(min_length=1)
difficulty: DifficultyTier
description: str
max_steps: int = Field(gt=0)
time_limit: float = Field(gt=0.0)
base_filesystem_path: str
class RewardSignal(BaseModel):
health_delta: float
knowledge_delta: float = Field(ge=0.0)
action_penalty: float = Field(le=0.0)
total_reward: float
class DiagnosticTrigger(BaseModel):
fact_id: str = Field(min_length=1)
command_patterns: list[str] = Field(min_length=1)
reward: float = Field(gt=0.0)
class TaskScenarioState(BaseModel):
health: float = Field(ge=0.0, le=1.0)
done: bool
details: dict[str, bool | float | str]
class TaskScenarioDefinition(BaseModel):
metadata: TaskMetadata
requires_network_isolation: bool = True
allows_nested_sandbox: bool = False
diagnostic_triggers: list[DiagnosticTrigger] = Field(default_factory=list)