File size: 4,730 Bytes
aab0192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
"""
models.py -- Pydantic data models for the Scientific Hypothesis Lab.

Follows the OpenEnv spec: Action, Observation, and State base types from
openenv.core.env_server.types.
"""

from __future__ import annotations

from enum import Enum
from typing import Any, Optional

from pydantic import Field

try:
    from openenv.core.env_server.types import Action, Observation, State
except ImportError:
    from pydantic import BaseModel

    class Action(BaseModel):  # type: ignore[no-redef]
        model_config = {"extra": "forbid"}
        metadata: dict[str, Any] = Field(default_factory=dict)

    class Observation(BaseModel):  # type: ignore[no-redef]
        model_config = {"extra": "forbid"}
        done: bool = False
        reward: float | None = None
        metadata: dict[str, Any] = Field(default_factory=dict)

    class State(BaseModel):  # type: ignore[no-redef]
        model_config = {"extra": "allow"}
        episode_id: Optional[str] = None
        step_count: int = 0


class ExperimentType(str, Enum):
    INTERVENTION = "intervention"
    CORRELATION = "correlation"
    COUNTERFACTUAL = "counterfactual"
    PASSIVE = "passive"


class ActionType(str, Enum):
    EXPERIMENT = "experiment"
    SUBMIT = "submit"


class NoiseLevelTag(str, Enum):
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"


class HypLabAction(Action):
    """
    Every message the agent sends to the environment.

    Two forms:
      - action_type=EXPERIMENT: run an experiment, burn one budget step
      - action_type=SUBMIT: commit to a hypothesis, end the episode
    """

    action_type: ActionType = Field(
        ...,
        description="Whether the agent is running an experiment or submitting.",
    )

    experiment_type: Optional[ExperimentType] = Field(
        None, description="Which kind of experiment to run."
    )
    target_variable: Optional[str] = Field(
        None, description="The variable the agent wants to observe."
    )
    control_variable: Optional[str] = Field(
        None, description="The variable the agent is setting or varying."
    )
    control_value: Optional[float] = Field(
        None,
        description=(
            "INTERVENTION: exact value to set. "
            "COUNTERFACTUAL: the proposed delta. "
            "Unused for PASSIVE."
        ),
    )
    control_range: Optional[list[float]] = Field(
        None,
        description="CORRELATION only: [min, max, n_points].",
    )

    hypothesis_text: Optional[str] = Field(
        None,
        description="Free-text statement of discovered rules.",
    )
    hypothesis_equations: Optional[list[str]] = Field(
        None,
        description="Structured list of equations, one per rule.",
    )
    confidence: Optional[float] = Field(
        None, ge=0.0, le=1.0,
        description="Agent's self-reported confidence [0,1].",
    )


class HypLabObservation(Observation):
    """
    Everything the environment hands back after reset() or step().
    Inherits `done`, `reward`, `metadata` from Observation base.
    """

    system_message: str = Field(
        ..., description="Human-readable description of what just happened."
    )
    available_variables: list[str] = Field(
        default_factory=list,
        description="Names of all variables in the current hidden world.",
    )
    budget_remaining: int = Field(
        0, description="Steps left before forced termination."
    )

    experiment_type_run: Optional[ExperimentType] = None
    control_variable_used: Optional[str] = None
    control_value_used: Optional[Any] = None
    target_variable_observed: Optional[str] = None
    result_value: Optional[Any] = Field(
        None,
        description="Noisy observed value(s). Float or list of (x,y) pairs.",
    )
    noise_sigma: Optional[float] = None
    is_redundant: bool = False
    info_gain_reward: float = 0.0

    accuracy_score: Optional[float] = Field(None, ge=0, le=1)
    precision_bonus: Optional[float] = None
    calibration_score: Optional[float] = None
    efficiency_bonus: Optional[float] = None
    contradiction_penalty: Optional[float] = None
    total_episode_reward: Optional[float] = None
    ground_truth_revealed: Optional[str] = None


class HypLabState(State):
    """
    Snapshot of episode metadata. Never leaks the hidden causal graph.
    Inherits `episode_id`, `step_count` from State base.
    """

    budget_total: int = 0
    budget_remaining: int = 0
    noise_level: NoiseLevelTag = NoiseLevelTag.MEDIUM
    noise_sigma: float = 0.20
    domain: str = "unknown"
    n_variables: int = 0
    experiment_history: list[dict] = Field(default_factory=list)
    cumulative_info_gain: float = 0.0
    redundant_experiment_count: int = 0