Spaces:

ccyloopss
/

HPCOpenenv

Paused

App Files Files Community

HPCOpenenv / sysadmin_env /models.py

huggingmenfordays

deploy: ccyloopss/HPCOpenenv — with OPENENV_API_KEY auth guard

bc35a94 18 days ago

raw

history blame contribute delete

2.46 kB

	from enum import Enum
	from typing import Optional

	from pydantic import BaseModel
	from pydantic import Field


	class DifficultyTier(str, Enum):
	easy = "easy"
	medium = "medium"
	hard = "hard"


	class Action(BaseModel):
	command: str = Field(min_length=1)
	reasoning: Optional[str] = None


	class Observation(BaseModel):
	stdout: str
	stderr: str
	exit_code: int
	working_directory: str
	execution_time: float = Field(ge=0.0)
	reward: float
	done: bool
	step_number: int = Field(ge=0)
	max_steps: int = Field(gt=0)
	# optional progress signals populated by the server-side reward engine.
	# clients that care about shaped progress (training) read these. older
	# clients simply ignore them.
	grader_health: float = 0.0
	grader_details: dict[str, bool \| float \| str] = Field(default_factory=dict)
	ood_http_code: str = ""


	class EnvironmentState(BaseModel):
	episode_id: str = Field(min_length=1)
	task_id: str = Field(min_length=1)
	step_count: int = Field(ge=0)
	max_steps: int = Field(gt=0)
	done: bool
	reward: float


	class ResetRequest(BaseModel):
	task_id: Optional[str] = None


	class StepRequest(BaseModel):
	action: Action
	# optional episode id so concurrent rollouts don't clobber each other's
	# session. older clients that omit it fall back to the most recently
	# created episode on the server.
	episode_id: Optional[str] = None


	class StepResult(BaseModel):
	observation: Observation
	state: EnvironmentState


	class TaskMetadata(BaseModel):
	task_id: str = Field(min_length=1)
	difficulty: DifficultyTier
	description: str
	max_steps: int = Field(gt=0)
	time_limit: float = Field(gt=0.0)
	base_filesystem_path: str


	class RewardSignal(BaseModel):
	health_delta: float
	knowledge_delta: float = Field(ge=0.0)
	action_penalty: float = Field(le=0.0)
	total_reward: float


	class DiagnosticTrigger(BaseModel):
	fact_id: str = Field(min_length=1)
	command_patterns: list[str] = Field(min_length=1)
	reward: float = Field(gt=0.0)


	class TaskScenarioState(BaseModel):
	health: float = Field(ge=0.0, le=1.0)
	done: bool
	details: dict[str, bool \| float \| str]


	class TaskScenarioDefinition(BaseModel):
	metadata: TaskMetadata
	requires_network_isolation: bool = True
	allows_nested_sandbox: bool = False
	diagnostic_triggers: list[DiagnosticTrigger] = Field(default_factory=list)