File size: 7,638 Bytes
d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a cca9a63 d8ff16a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | # Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Data models for the ChipForge RTL Debugging Environment.
Designed for RL training of LLMs:
- Observation is a self-contained Markov state (always includes RTL code)
- Reward uses potential-based shaping for dense per-step signal
- Action result feedback at every step
"""
from typing import Any, Dict, Literal, Optional
from openenv.core.env_server.types import Action, Observation
from pydantic import Field, model_validator
ActionType = Literal[
"view_design",
"view_testbench",
"view_synthesis_log",
"view_lint_log",
"view_simulation_log",
"run_simulation",
"run_synthesis",
"run_lint",
"edit_line",
"append_line",
"insert_lines",
"replace_lines",
"write_file",
"submit",
]
class ChipforgeAction(Action):
"""Action for the ChipForge environment.
Supported action_types:
- view_design: View the design (RTL) code
- view_testbench: View the testbench code
- view_synthesis_log: View synthesis log (only if run_synthesis was executed)
- view_lint_log: View lint log (only if run_lint was executed)
- view_simulation_log: View simulation log (only if run_simulation was executed)
- run_simulation: Compile and simulate with Verilator
- run_synthesis: Synthesize with Yosys
- run_lint: Run Verilator lint checks
- edit_line: Replace a single line (requires target, line_number + new_content)
- append_line: Append one new line (requires target, new_content)
- insert_lines: Insert multiple lines starting at line_number (requires target, line_number + new_content)
- replace_lines: Replace multiple lines from line_number to end_line_number with new_content (requires target)
- write_file: Write the entire file (requires target and new_content)
- submit: Submit current RTL as the final solution
"""
action_type: ActionType = Field(..., description="Type of action to execute")
target: Literal["design", "testbench"] = Field(
default="design",
description="Target file for the edit ('design' or 'testbench'). Required for edit/append/insert/replace actions.",
)
line_number: Optional[int] = Field(
default=None,
description="Line number to edit (1-indexed). Required for edit_line, insert_lines, replace_lines.",
)
end_line_number: Optional[int] = Field(
default=None,
description="End line number to replace (1-indexed). Required for replace_lines.",
)
new_content: Optional[str] = Field(
default=None,
description="New content. Required for edit, append, insert, and replace actions.",
)
@model_validator(mode="after")
def validate_action_payload(self) -> "ChipforgeAction":
is_edit = self.action_type == "edit_line"
is_append = self.action_type == "append_line"
is_insert = self.action_type == "insert_lines"
is_replace = self.action_type == "replace_lines"
is_write = self.action_type == "write_file"
has_line = self.line_number is not None
has_end_line = self.end_line_number is not None
has_content = self.new_content is not None
has_target = self.target in ("design", "testbench")
if is_edit and (not has_line or not has_content or not has_target):
raise ValueError("edit_line requires target, line_number and new_content")
if is_insert and (not has_line or not has_content or not has_target):
raise ValueError("insert_lines requires target, line_number and new_content")
if is_replace and (not has_line or not has_end_line or not has_content or not has_target):
raise ValueError("replace_lines requires target, line_number, end_line_number, and new_content")
if is_append and (not has_content or has_line or not has_target):
raise ValueError(
"append_line requires target and new_content only"
)
if is_write and (not has_content or has_line or not has_target):
raise ValueError(
"write_file requires target and new_content only"
)
if (not is_edit and not is_append and not is_insert and not is_replace and not is_write) and (
has_line or has_end_line or has_content
):
raise ValueError(
"line_number/end_line_number/new_content are only valid for edit/append/insert/replace/write actions"
)
return self
class ChipforgeObservation(Observation):
"""Observation returned by the ChipForge environment.
Designed as a self-contained Markov state for RL training.
Always includes the current design code.
Tool logs are only populated when explicitly requested via:
- view_synthesis_log: Shows synthesis logs from last run
- view_lint_log: Shows lint logs from last run
- view_simulation_log: Shows simulation logs from last run
"""
# ββ Always populated (Markov state core) βββββββββββββββββββββββββββββ
design_code: str = Field(
default="",
description="Current design code with line numbers (always present)",
)
sim_status: Literal["not_run", "pass", "fail", "error"] = Field(
default="not_run",
description="Latest simulation status for current design snapshot",
)
synth_status: Literal["not_run", "pass", "warning", "error"] = Field(
default="not_run",
description="Latest synthesis status for current design snapshot",
)
lint_status: Literal["not_run", "clean", "warning", "error"] = Field(
default="not_run",
description="Latest lint status for current design snapshot",
)
error_summary: str = Field(
default="",
description="One-line summary of the most relevant diagnostic",
)
task_description: str = Field(
default="",
description="Natural language description of the loaded debug task",
)
# Action feedback (what just happened)
last_action: str = Field(
default="reset", description="The action that produced this observation"
)
action_result: str = Field(
default="",
description="Human-readable result of the last action taken",
)
# ββ Conditionally populated (verbose action-specific payload) βββββββββ
testbench_code: str = Field(
default="", description="Testbench code (populated by view_testbench)"
)
log_output: str = Field(
default="",
description="Tool output log, truncated to 2000 chars (populated by view_synthesis_log, view_lint_log, view_simulation_log, or tool runs)",
)
metadata: Dict[str, Any] = Field(
default_factory=dict,
description="Optional extra machine-readable fields for clients/prompts",
)
# ββ RL signals ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
step_count: int = Field(default=0, description="Steps taken so far")
max_steps: int = Field(default=20, description="Maximum allowed steps")
cumulative_reward: float = Field(
default=0.0,
description="Total quality score so far (0.0 to 1.0)",
)
|