Labexperiment / models.py
Sbhimraj's picture
Add application file
aab0192
Raw
History Blame Contribute Delete
4.73 kB
"""
models.py -- Pydantic data models for the Scientific Hypothesis Lab.
Follows the OpenEnv spec: Action, Observation, and State base types from
openenv.core.env_server.types.
"""
from __future__ import annotations
from enum import Enum
from typing import Any, Optional
from pydantic import Field
try:
from openenv.core.env_server.types import Action, Observation, State
except ImportError:
from pydantic import BaseModel
class Action(BaseModel): # type: ignore[no-redef]
model_config = {"extra": "forbid"}
metadata: dict[str, Any] = Field(default_factory=dict)
class Observation(BaseModel): # type: ignore[no-redef]
model_config = {"extra": "forbid"}
done: bool = False
reward: float | None = None
metadata: dict[str, Any] = Field(default_factory=dict)
class State(BaseModel): # type: ignore[no-redef]
model_config = {"extra": "allow"}
episode_id: Optional[str] = None
step_count: int = 0
class ExperimentType(str, Enum):
INTERVENTION = "intervention"
CORRELATION = "correlation"
COUNTERFACTUAL = "counterfactual"
PASSIVE = "passive"
class ActionType(str, Enum):
EXPERIMENT = "experiment"
SUBMIT = "submit"
class NoiseLevelTag(str, Enum):
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
class HypLabAction(Action):
"""
Every message the agent sends to the environment.
Two forms:
- action_type=EXPERIMENT: run an experiment, burn one budget step
- action_type=SUBMIT: commit to a hypothesis, end the episode
"""
action_type: ActionType = Field(
...,
description="Whether the agent is running an experiment or submitting.",
)
experiment_type: Optional[ExperimentType] = Field(
None, description="Which kind of experiment to run."
)
target_variable: Optional[str] = Field(
None, description="The variable the agent wants to observe."
)
control_variable: Optional[str] = Field(
None, description="The variable the agent is setting or varying."
)
control_value: Optional[float] = Field(
None,
description=(
"INTERVENTION: exact value to set. "
"COUNTERFACTUAL: the proposed delta. "
"Unused for PASSIVE."
),
)
control_range: Optional[list[float]] = Field(
None,
description="CORRELATION only: [min, max, n_points].",
)
hypothesis_text: Optional[str] = Field(
None,
description="Free-text statement of discovered rules.",
)
hypothesis_equations: Optional[list[str]] = Field(
None,
description="Structured list of equations, one per rule.",
)
confidence: Optional[float] = Field(
None, ge=0.0, le=1.0,
description="Agent's self-reported confidence [0,1].",
)
class HypLabObservation(Observation):
"""
Everything the environment hands back after reset() or step().
Inherits `done`, `reward`, `metadata` from Observation base.
"""
system_message: str = Field(
..., description="Human-readable description of what just happened."
)
available_variables: list[str] = Field(
default_factory=list,
description="Names of all variables in the current hidden world.",
)
budget_remaining: int = Field(
0, description="Steps left before forced termination."
)
experiment_type_run: Optional[ExperimentType] = None
control_variable_used: Optional[str] = None
control_value_used: Optional[Any] = None
target_variable_observed: Optional[str] = None
result_value: Optional[Any] = Field(
None,
description="Noisy observed value(s). Float or list of (x,y) pairs.",
)
noise_sigma: Optional[float] = None
is_redundant: bool = False
info_gain_reward: float = 0.0
accuracy_score: Optional[float] = Field(None, ge=0, le=1)
precision_bonus: Optional[float] = None
calibration_score: Optional[float] = None
efficiency_bonus: Optional[float] = None
contradiction_penalty: Optional[float] = None
total_episode_reward: Optional[float] = None
ground_truth_revealed: Optional[str] = None
class HypLabState(State):
"""
Snapshot of episode metadata. Never leaks the hidden causal graph.
Inherits `episode_id`, `step_count` from State base.
"""
budget_total: int = 0
budget_remaining: int = 0
noise_level: NoiseLevelTag = NoiseLevelTag.MEDIUM
noise_sigma: float = 0.20
domain: str = "unknown"
n_variables: int = 0
experiment_history: list[dict] = Field(default_factory=list)
cumulative_info_gain: float = 0.0
redundant_experiment_count: int = 0