File size: 7,638 Bytes
d8ff16a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cca9a63
d8ff16a
 
 
 
 
 
 
 
 
cca9a63
 
 
d8ff16a
 
 
 
 
 
 
 
cca9a63
d8ff16a
 
 
 
 
 
 
cca9a63
 
 
 
 
d8ff16a
 
 
 
cca9a63
 
 
 
d8ff16a
 
cca9a63
d8ff16a
cca9a63
d8ff16a
cca9a63
d8ff16a
cca9a63
d8ff16a
cca9a63
d8ff16a
 
 
 
 
 
cca9a63
 
 
d8ff16a
cca9a63
d8ff16a
cca9a63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8ff16a
cca9a63
d8ff16a
cca9a63
d8ff16a
 
cca9a63
 
d8ff16a
 
cca9a63
d8ff16a
 
 
 
 
 
 
 
cca9a63
d8ff16a
 
 
 
 
 
 
cca9a63
d8ff16a
cca9a63
d8ff16a
 
 
cca9a63
d8ff16a
 
 
cca9a63
d8ff16a
 
 
cca9a63
d8ff16a
 
 
 
 
 
 
cca9a63
d8ff16a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Data models for the ChipForge RTL Debugging Environment.

Designed for RL training of LLMs:
  - Observation is a self-contained Markov state (always includes RTL code)
  - Reward uses potential-based shaping for dense per-step signal
  - Action result feedback at every step
"""

from typing import Any, Dict, Literal, Optional

from openenv.core.env_server.types import Action, Observation
from pydantic import Field, model_validator

ActionType = Literal[
    "view_design",
    "view_testbench",
    "view_synthesis_log",
    "view_lint_log",
    "view_simulation_log",
    "run_simulation",
    "run_synthesis",
    "run_lint",
    "edit_line",
    "append_line",
    "insert_lines",
    "replace_lines",
    "write_file",
    "submit",
]


class ChipforgeAction(Action):
    """Action for the ChipForge environment.

    Supported action_types:
        - view_design: View the design (RTL) code
        - view_testbench: View the testbench code
        - view_synthesis_log: View synthesis log (only if run_synthesis was executed)
        - view_lint_log: View lint log (only if run_lint was executed)
        - view_simulation_log: View simulation log (only if run_simulation was executed)
        - run_simulation: Compile and simulate with Verilator
        - run_synthesis: Synthesize with Yosys
        - run_lint: Run Verilator lint checks
        - edit_line: Replace a single line (requires target, line_number + new_content)
        - append_line: Append one new line (requires target, new_content)
        - insert_lines: Insert multiple lines starting at line_number (requires target, line_number + new_content)
        - replace_lines: Replace multiple lines from line_number to end_line_number with new_content (requires target)
        - write_file: Write the entire file (requires target and new_content)
        - submit: Submit current RTL as the final solution
    """

    action_type: ActionType = Field(..., description="Type of action to execute")
    target: Literal["design", "testbench"] = Field(
        default="design",
        description="Target file for the edit ('design' or 'testbench'). Required for edit/append/insert/replace actions.",
    )
    line_number: Optional[int] = Field(
        default=None,
        description="Line number to edit (1-indexed). Required for edit_line, insert_lines, replace_lines.",
    )
    end_line_number: Optional[int] = Field(
        default=None,
        description="End line number to replace (1-indexed). Required for replace_lines.",
    )
    new_content: Optional[str] = Field(
        default=None,
        description="New content. Required for edit, append, insert, and replace actions.",
    )

    @model_validator(mode="after")
    def validate_action_payload(self) -> "ChipforgeAction":
        is_edit = self.action_type == "edit_line"
        is_append = self.action_type == "append_line"
        is_insert = self.action_type == "insert_lines"
        is_replace = self.action_type == "replace_lines"
        is_write = self.action_type == "write_file"
        has_line = self.line_number is not None
        has_end_line = self.end_line_number is not None
        has_content = self.new_content is not None
        has_target = self.target in ("design", "testbench")

        if is_edit and (not has_line or not has_content or not has_target):
            raise ValueError("edit_line requires target, line_number and new_content")
            
        if is_insert and (not has_line or not has_content or not has_target):
            raise ValueError("insert_lines requires target, line_number and new_content")
            
        if is_replace and (not has_line or not has_end_line or not has_content or not has_target):
            raise ValueError("replace_lines requires target, line_number, end_line_number, and new_content")

        if is_append and (not has_content or has_line or not has_target):
            raise ValueError(
                "append_line requires target and new_content only"
            )

        if is_write and (not has_content or has_line or not has_target):
            raise ValueError(
                "write_file requires target and new_content only"
            )

        if (not is_edit and not is_append and not is_insert and not is_replace and not is_write) and (
            has_line or has_end_line or has_content
        ):
            raise ValueError(
                "line_number/end_line_number/new_content are only valid for edit/append/insert/replace/write actions"
            )
        return self


class ChipforgeObservation(Observation):
    """Observation returned by the ChipForge environment.

    Designed as a self-contained Markov state for RL training.
    Always includes the current design code.
    Tool logs are only populated when explicitly requested via:
        - view_synthesis_log: Shows synthesis logs from last run
        - view_lint_log: Shows lint logs from last run
        - view_simulation_log: Shows simulation logs from last run
    """

    # ── Always populated (Markov state core) ─────────────────────────────
    design_code: str = Field(
        default="",
        description="Current design code with line numbers (always present)",
    )
    sim_status: Literal["not_run", "pass", "fail", "error"] = Field(
        default="not_run",
        description="Latest simulation status for current design snapshot",
    )
    synth_status: Literal["not_run", "pass", "warning", "error"] = Field(
        default="not_run",
        description="Latest synthesis status for current design snapshot",
    )
    lint_status: Literal["not_run", "clean", "warning", "error"] = Field(
        default="not_run",
        description="Latest lint status for current design snapshot",
    )
    error_summary: str = Field(
        default="",
        description="One-line summary of the most relevant diagnostic",
    )
    task_description: str = Field(
        default="",
        description="Natural language description of the loaded debug task",
    )

    # Action feedback (what just happened)
    last_action: str = Field(
        default="reset", description="The action that produced this observation"
    )
    action_result: str = Field(
        default="",
        description="Human-readable result of the last action taken",
    )

    # ── Conditionally populated (verbose action-specific payload) ─────────
    testbench_code: str = Field(
        default="", description="Testbench code (populated by view_testbench)"
    )
    log_output: str = Field(
        default="",
        description="Tool output log, truncated to 2000 chars (populated by view_synthesis_log, view_lint_log, view_simulation_log, or tool runs)",
    )
    metadata: Dict[str, Any] = Field(
        default_factory=dict,
        description="Optional extra machine-readable fields for clients/prompts",
    )

    # ── RL signals ──────────────────────────────────────────────────────
    step_count: int = Field(default=0, description="Steps taken so far")
    max_steps: int = Field(default=20, description="Maximum allowed steps")
    cumulative_reward: float = Field(
        default=0.0,
        description="Total quality score so far (0.0 to 1.0)",
    )