Spaces:

Sumukh66
/

Labexperiment

Sleeping

File size: 4,730 Bytes

aab0192

"""
models.py -- Pydantic data models for the Scientific Hypothesis Lab.

Follows the OpenEnv spec: Action, Observation, and State base types from
openenv.core.env_server.types.
"""

from __future__ import annotations

from enum import Enum
from typing import Any, Optional

from pydantic import Field

try:
    from openenv.core.env_server.types import Action, Observation, State
except ImportError:
    from pydantic import BaseModel

    class Action(BaseModel):  # type: ignore[no-redef]
        model_config = {"extra": "forbid"}
        metadata: dict[str, Any] = Field(default_factory=dict)

    class Observation(BaseModel):  # type: ignore[no-redef]
        model_config = {"extra": "forbid"}
        done: bool = False
        reward: float | None = None
        metadata: dict[str, Any] = Field(default_factory=dict)

    class State(BaseModel):  # type: ignore[no-redef]
        model_config = {"extra": "allow"}
        episode_id: Optional[str] = None
        step_count: int = 0


class ExperimentType(str, Enum):
    INTERVENTION = "intervention"
    CORRELATION = "correlation"
    COUNTERFACTUAL = "counterfactual"
    PASSIVE = "passive"


class ActionType(str, Enum):
    EXPERIMENT = "experiment"
    SUBMIT = "submit"


class NoiseLevelTag(str, Enum):
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"


class HypLabAction(Action):
    """
    Every message the agent sends to the environment.

    Two forms:
      - action_type=EXPERIMENT: run an experiment, burn one budget step
      - action_type=SUBMIT: commit to a hypothesis, end the episode
    """

    action_type: ActionType = Field(
        ...,
        description="Whether the agent is running an experiment or submitting.",
    )

    experiment_type: Optional[ExperimentType] = Field(
        None, description="Which kind of experiment to run."
    )
    target_variable: Optional[str] = Field(
        None, description="The variable the agent wants to observe."
    )
    control_variable: Optional[str] = Field(
        None, description="The variable the agent is setting or varying."
    )
    control_value: Optional[float] = Field(
        None,
        description=(
            "INTERVENTION: exact value to set. "
            "COUNTERFACTUAL: the proposed delta. "
            "Unused for PASSIVE."
        ),
    )
    control_range: Optional[list[float]] = Field(
        None,
        description="CORRELATION only: [min, max, n_points].",
    )

    hypothesis_text: Optional[str] = Field(
        None,
        description="Free-text statement of discovered rules.",
    )
    hypothesis_equations: Optional[list[str]] = Field(
        None,
        description="Structured list of equations, one per rule.",
    )
    confidence: Optional[float] = Field(
        None, ge=0.0, le=1.0,
        description="Agent's self-reported confidence [0,1].",
    )


class HypLabObservation(Observation):
    """
    Everything the environment hands back after reset() or step().
    Inherits `done`, `reward`, `metadata` from Observation base.
    """

    system_message: str = Field(
        ..., description="Human-readable description of what just happened."
    )
    available_variables: list[str] = Field(
        default_factory=list,
        description="Names of all variables in the current hidden world.",
    )
    budget_remaining: int = Field(
        0, description="Steps left before forced termination."
    )

    experiment_type_run: Optional[ExperimentType] = None
    control_variable_used: Optional[str] = None
    control_value_used: Optional[Any] = None
    target_variable_observed: Optional[str] = None
    result_value: Optional[Any] = Field(
        None,
        description="Noisy observed value(s). Float or list of (x,y) pairs.",
    )
    noise_sigma: Optional[float] = None
    is_redundant: bool = False
    info_gain_reward: float = 0.0

    accuracy_score: Optional[float] = Field(None, ge=0, le=1)
    precision_bonus: Optional[float] = None
    calibration_score: Optional[float] = None
    efficiency_bonus: Optional[float] = None
    contradiction_penalty: Optional[float] = None
    total_episode_reward: Optional[float] = None
    ground_truth_revealed: Optional[str] = None


class HypLabState(State):
    """
    Snapshot of episode metadata. Never leaks the hidden causal graph.
    Inherits `episode_id`, `step_count` from State base.
    """

    budget_total: int = 0
    budget_remaining: int = 0
    noise_level: NoiseLevelTag = NoiseLevelTag.MEDIUM
    noise_sigma: float = 0.20
    domain: str = "unknown"
    n_variables: int = 0
    experiment_history: list[dict] = Field(default_factory=list)
    cumulative_info_gain: float = 0.0
    redundant_experiment_count: int = 0