""" models.py -- Pydantic data models for the Scientific Hypothesis Lab. Follows the OpenEnv spec: Action, Observation, and State base types from openenv.core.env_server.types. """ from __future__ import annotations from enum import Enum from typing import Any, Optional from pydantic import Field try: from openenv.core.env_server.types import Action, Observation, State except ImportError: from pydantic import BaseModel class Action(BaseModel): # type: ignore[no-redef] model_config = {"extra": "forbid"} metadata: dict[str, Any] = Field(default_factory=dict) class Observation(BaseModel): # type: ignore[no-redef] model_config = {"extra": "forbid"} done: bool = False reward: float | None = None metadata: dict[str, Any] = Field(default_factory=dict) class State(BaseModel): # type: ignore[no-redef] model_config = {"extra": "allow"} episode_id: Optional[str] = None step_count: int = 0 class ExperimentType(str, Enum): INTERVENTION = "intervention" CORRELATION = "correlation" COUNTERFACTUAL = "counterfactual" PASSIVE = "passive" class ActionType(str, Enum): EXPERIMENT = "experiment" SUBMIT = "submit" class NoiseLevelTag(str, Enum): LOW = "low" MEDIUM = "medium" HIGH = "high" class HypLabAction(Action): """ Every message the agent sends to the environment. Two forms: - action_type=EXPERIMENT: run an experiment, burn one budget step - action_type=SUBMIT: commit to a hypothesis, end the episode """ action_type: ActionType = Field( ..., description="Whether the agent is running an experiment or submitting.", ) experiment_type: Optional[ExperimentType] = Field( None, description="Which kind of experiment to run." ) target_variable: Optional[str] = Field( None, description="The variable the agent wants to observe." ) control_variable: Optional[str] = Field( None, description="The variable the agent is setting or varying." ) control_value: Optional[float] = Field( None, description=( "INTERVENTION: exact value to set. " "COUNTERFACTUAL: the proposed delta. " "Unused for PASSIVE." ), ) control_range: Optional[list[float]] = Field( None, description="CORRELATION only: [min, max, n_points].", ) hypothesis_text: Optional[str] = Field( None, description="Free-text statement of discovered rules.", ) hypothesis_equations: Optional[list[str]] = Field( None, description="Structured list of equations, one per rule.", ) confidence: Optional[float] = Field( None, ge=0.0, le=1.0, description="Agent's self-reported confidence [0,1].", ) class HypLabObservation(Observation): """ Everything the environment hands back after reset() or step(). Inherits `done`, `reward`, `metadata` from Observation base. """ system_message: str = Field( ..., description="Human-readable description of what just happened." ) available_variables: list[str] = Field( default_factory=list, description="Names of all variables in the current hidden world.", ) budget_remaining: int = Field( 0, description="Steps left before forced termination." ) experiment_type_run: Optional[ExperimentType] = None control_variable_used: Optional[str] = None control_value_used: Optional[Any] = None target_variable_observed: Optional[str] = None result_value: Optional[Any] = Field( None, description="Noisy observed value(s). Float or list of (x,y) pairs.", ) noise_sigma: Optional[float] = None is_redundant: bool = False info_gain_reward: float = 0.0 accuracy_score: Optional[float] = Field(None, ge=0, le=1) precision_bonus: Optional[float] = None calibration_score: Optional[float] = None efficiency_bonus: Optional[float] = None contradiction_penalty: Optional[float] = None total_episode_reward: Optional[float] = None ground_truth_revealed: Optional[str] = None class HypLabState(State): """ Snapshot of episode metadata. Never leaks the hidden causal graph. Inherits `episode_id`, `step_count` from State base. """ budget_total: int = 0 budget_remaining: int = 0 noise_level: NoiseLevelTag = NoiseLevelTag.MEDIUM noise_sigma: float = 0.20 domain: str = "unknown" n_variables: int = 0 experiment_history: list[dict] = Field(default_factory=list) cumulative_info_gain: float = 0.0 redundant_experiment_count: int = 0