| """ |
| Architect Module β Spec2RTL Decomposer Agent |
| ============================================= |
| |
| Based on: Spec2RTL-Agent (arXiv:2405.xxxxx) |
| |
| Before writing any Verilog, this module reads the input specification (text/PDF) |
| and produces a Structured Information Dictionary (SID) in JSON format. |
| |
| The SID explicitly defines: |
| - Top-level module name, parameters, ports |
| - Sub-module names, inputs, outputs, and functional logic |
| - FSM state maps, datapath descriptions, timing constraints |
| - Interface protocols and reset strategy |
| |
| This JSON contract becomes the SINGLE SOURCE OF TRUTH for all downstream agents |
| (Coder, Verifier, Debugger) β eliminating ambiguity and hallucination. |
| """ |
|
|
| import json |
| import re |
| import logging |
| import os |
| from dataclasses import dataclass, field, asdict |
| from typing import Any, Dict, List, Optional, Tuple |
| from crewai import Agent, Task, Crew, LLM |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| |
|
|
| @dataclass |
| class PortDef: |
| """Single port definition.""" |
| name: str |
| direction: str |
| width: str |
| description: str = "" |
| reset_value: str = "" |
|
|
|
|
| @dataclass |
| class ParameterDef: |
| """Parameterisation slot.""" |
| name: str |
| default: str |
| description: str = "" |
|
|
|
|
| @dataclass |
| class FSMStateDef: |
| """Single FSM state.""" |
| name: str |
| encoding: str = "" |
| description: str = "" |
| transitions: List[Dict[str, str]] = field(default_factory=list) |
| outputs: Dict[str, str] = field(default_factory=dict) |
|
|
|
|
| @dataclass |
| class SubModuleDef: |
| """One sub-module (including the top-level module itself).""" |
| name: str |
| description: str = "" |
| parameters: List[ParameterDef] = field(default_factory=list) |
| ports: List[PortDef] = field(default_factory=list) |
| functional_logic: str = "" |
| rtl_skeleton: str = "" |
| fsm_states: List[FSMStateDef] = field(default_factory=list) |
| internal_signals: List[Dict[str, str]] = field(default_factory=list) |
| instantiates: List[str] = field(default_factory=list) |
|
|
|
|
| @dataclass |
| class StructuredSpecDict: |
| """ |
| Complete Structured Information Dictionary for a chip design. |
| This is the JSON contract between the Architect β Coder β Verifier pipeline. |
| """ |
| design_name: str |
| chip_family: str |
| description: str |
| top_module: str |
| reset_style: str = "sync" |
| clock_name: str = "clk" |
| reset_name: str = "rst_n" |
| reset_polarity: str = "active_low" |
| parameters: List[ParameterDef] = field(default_factory=list) |
| sub_modules: List[SubModuleDef] = field(default_factory=list) |
| interface_protocol: str = "" |
| timing_notes: str = "" |
| verification_hints: List[str] = field(default_factory=list) |
|
|
| def to_json(self) -> str: |
| return json.dumps(asdict(self), indent=2) |
|
|
| @classmethod |
| def from_json(cls, json_str: str) -> "StructuredSpecDict": |
| data = json.loads(json_str) |
| |
| params = [ParameterDef(**p) for p in data.pop("parameters", [])] |
| subs = [] |
| for sm in data.pop("sub_modules", []): |
| sm_params = [ParameterDef(**p) for p in sm.pop("parameters", [])] |
| sm_ports = [PortDef(**p) for p in sm.pop("ports", [])] |
| sm_fsm = [FSMStateDef(**s) for s in sm.pop("fsm_states", [])] |
| subs.append(SubModuleDef(parameters=sm_params, ports=sm_ports, |
| fsm_states=sm_fsm, **sm)) |
| return cls(parameters=params, sub_modules=subs, **data) |
|
|
| def validate(self) -> Tuple[bool, List[str]]: |
| """Validate the SID for completeness and consistency.""" |
| errors: List[str] = [] |
| if not self.design_name: |
| errors.append("design_name is empty") |
| if not self.top_module: |
| errors.append("top_module is empty") |
| if not self.sub_modules: |
| errors.append("No sub_modules defined") |
| for sm in self.sub_modules: |
| if not sm.name: |
| errors.append("Sub-module has empty name") |
| if not sm.ports: |
| errors.append(f"Sub-module '{sm.name}' has no ports") |
| if not sm.functional_logic: |
| errors.append(f"Sub-module '{sm.name}' has no functional_logic") |
| |
| port_names = {p.name for p in sm.ports} |
| if sm.fsm_states and self.clock_name not in port_names: |
| errors.append(f"Sub-module '{sm.name}' has FSM but no '{self.clock_name}' port") |
| return len(errors) == 0, errors |
|
|
|
|
| |
|
|
| DECOMPOSE_SYSTEM_PROMPT = """\ |
| You are a Principal VLSI Architect performing Spec-to-RTL decomposition. |
| |
| TASK: Given a natural-language chip specification, produce a COMPLETE Structured |
| Information Dictionary (SID) in **valid JSON format**. |
| |
| The JSON MUST follow this EXACT schema: |
| {schema} |
| |
| MANDATORY RULES: |
| 1. Every module (including top-level) MUST appear in "sub_modules" with ALL fields populated. |
| 2. Every sub-module MUST have at minimum: name, ports (with direction and width), functional_logic. |
| 3. For sequential designs, clk and rst_n ports are MANDATORY. |
| 4. FSM modules MUST list ALL states with transitions and outputs. |
| 5. Use "parameters" for configurable widths/depths β NEVER hardcode magic numbers. |
| 6. "functional_logic" must be a CONCISE (under 100 words) specification of the behavior. DO NOT generate Verilog skeletons in this JSON. |
| 7. CRITICAL JSON RULES: You are generating a massive JSON object. You MUST double check your syntax. NEVER use unescaped quotes inside strings. NEVER leave trailing commas before closing braces. Ensure all objects and arrays are properly closed. |
| 8. Limit the JSON size by omitting any unnecessary commentary, and avoiding massive unneeded string literals. |
| 9. IF THE DESIGN IS MASSIVE (e.g. CPUs, SoCs, Superscalar systems): You MUST OMIT the `fsm_states` and `internal_signals` arrays entirely to save tokens. The Designer module will independently infer those. |
| """ |
|
|
| DECOMPOSE_USER_PROMPT = """\ |
| DESIGN NAME: {design_name} |
| SPECIFICATION: {spec_text} |
| |
| Produce the complete Structured Information Dictionary (JSON) for this chip design. |
| Decompose into sub-modules where architecturally appropriate (e.g., separate datapath, |
| controller, interface adapter). For simple designs, a single top-level module suffices. |
| """ |
|
|
|
|
| |
|
|
| class ArchitectModule: |
| """ |
| Spec2RTL Decomposer Agent. |
| |
| Reads a natural language specification and produces a StructuredSpecDict |
| (JSON) that defines every sub-module, port, parameter, and FSM state |
| BEFORE any Verilog is written. |
| """ |
|
|
| |
| _SCHEMA_DESC = json.dumps({ |
| "design_name": "str", |
| "chip_family": "str (counter|ALU|FIFO|FSM|UART|SPI|AXI|crypto|processor|SoC|...)", |
| "description": "str", |
| "top_module": "str (Verilog identifier)", |
| "reset_style": "sync|async", |
| "clock_name": "str", |
| "reset_name": "str", |
| "reset_polarity": "active_low|active_high", |
| "parameters": [{"name": "str", "default": "str", "description": "str"}], |
| "sub_modules": [{ |
| "name": "str (Verilog identifier)", |
| "description": "str", |
| "parameters": [{"name": "str", "default": "str", "description": "str"}], |
| "ports": [{"name": "str", "direction": "input|output", |
| "width": "str", "description": "str", "reset_value": "str"}], |
| "functional_logic": "CONCISE natural-language description of behavior (Max 100 words)", |
| "fsm_states": [{"name": "str", "encoding": "str", "description": "str", |
| "transitions": [{"condition": "str", "next_state": "str"}], |
| "outputs": {"signal": "value"}}], |
| "internal_signals": [{"name": "str", "width": "str", "purpose": "str"}], |
| "instantiates": ["sub_module_name"] |
| }], |
| "interface_protocol": "str", |
| "timing_notes": "str", |
| "verification_hints": ["str"] |
| }, indent=2) |
|
|
| def __init__(self, llm: LLM, verbose: bool = False, max_retries: int = 3): |
| self.llm = llm |
| self.verbose = verbose |
| self.max_retries = max_retries |
|
|
| def decompose(self, design_name: str, spec_text: str, |
| save_path: Optional[str] = None) -> StructuredSpecDict: |
| """ |
| Main entry point: decompose a natural-language spec into a StructuredSpecDict. |
| |
| Args: |
| design_name: Verilog-safe design name. |
| spec_text: Natural language specification (or existing MAS). |
| save_path: Optional path to save the JSON artifact. |
| |
| Returns: |
| Validated StructuredSpecDict. |
| """ |
| logger.info(f"[Architect] Decomposing spec for '{design_name}'") |
|
|
| system_prompt = DECOMPOSE_SYSTEM_PROMPT.format(schema=self._SCHEMA_DESC) |
| user_prompt = DECOMPOSE_USER_PROMPT.format( |
| design_name=design_name, |
| spec_text=spec_text[:12000], |
| ) |
|
|
| sid = None |
| last_error = "" |
|
|
| for attempt in range(1, self.max_retries + 1): |
| logger.info(f"[Architect] Decompose attempt {attempt}/{self.max_retries}") |
|
|
| |
| retry_context = "" |
| if last_error: |
| retry_context = ( |
| f"\n\nPREVIOUS ATTEMPT FAILED WITH:\n{last_error}\n" |
| "Fix the issues and return a corrected JSON. Ensure there are no trailing commas and double quotes are escaped." |
| ) |
|
|
| agent = Agent( |
| role="Spec2RTL Decomposer", |
| goal=f"Produce a complete Structured Information Dictionary for {design_name}", |
| backstory=( |
| "You are a world-class VLSI architect who converts natural-language " |
| "chip specifications into precise, machine-readable JSON contracts. " |
| "You never leave fields empty or use placeholders." |
| ), |
| llm=self.llm, |
| verbose=self.verbose, |
| ) |
|
|
| task = Task( |
| description=system_prompt + "\n\n" + user_prompt + retry_context, |
| expected_output="Valid JSON matching the Structured Information Dictionary schema", |
| agent=agent, |
| ) |
|
|
| try: |
| raw = str(Crew(agents=[agent], tasks=[task]).kickoff()) |
| sid = self._parse_response(raw, design_name) |
| |
| |
| ok, errs = sid.validate() |
| if not ok: |
| last_error = "Validation errors:\n" + "\n".join(f" - {e}" for e in errs) |
| logger.warning(f"[Architect] Validation failed: {errs}") |
| sid = None |
| continue |
|
|
| logger.info(f"[Architect] Successfully decomposed into " |
| f"{len(sid.sub_modules)} sub-modules") |
| break |
|
|
| except Exception as e: |
| last_error = f"Parse/execution error: {str(e)}" |
| logger.warning(f"[Architect] Attempt {attempt} failed: {e}") |
| continue |
|
|
| if sid is None: |
| |
| logger.warning("[Architect] All attempts failed β generating fallback SID") |
| sid = self._fallback_sid(design_name, spec_text) |
|
|
| |
| if save_path: |
| os.makedirs(os.path.dirname(save_path), exist_ok=True) |
| with open(save_path, "w") as f: |
| f.write(sid.to_json()) |
| logger.info(f"[Architect] SID saved to {save_path}") |
|
|
| return sid |
|
|
| def _parse_response(self, raw: str, design_name: str) -> StructuredSpecDict: |
| """Extract JSON from LLM response (may contain markdown fences).""" |
| text = raw.strip() |
|
|
| |
| json_match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text) |
| if json_match: |
| text = json_match.group(1).strip() |
|
|
| |
| brace_start = text.find('{') |
| brace_end = text.rfind('}') |
| if brace_start >= 0 and brace_end > brace_start: |
| text = text[brace_start:brace_end + 1] |
|
|
| data = json.loads(text) |
|
|
| |
| if not data.get("design_name"): |
| data["design_name"] = design_name |
| if not data.get("top_module"): |
| data["top_module"] = design_name |
|
|
| return StructuredSpecDict.from_json(json.dumps(data)) |
|
|
| def _fallback_sid(self, design_name: str, spec_text: str) -> StructuredSpecDict: |
| """Generate a minimal SID when LLM decomposition fails.""" |
| return StructuredSpecDict( |
| design_name=design_name, |
| chip_family="unknown", |
| description=spec_text[:2000], |
| top_module=design_name, |
| reset_style="sync", |
| parameters=[], |
| sub_modules=[ |
| SubModuleDef( |
| name=design_name, |
| description=spec_text[:2000], |
| ports=[ |
| PortDef(name="clk", direction="input", width="1", description="System clock"), |
| PortDef(name="rst_n", direction="input", width="1", description="Active-low reset"), |
| ], |
| functional_logic=spec_text[:2000], |
| ) |
| ], |
| verification_hints=["Requires manual specification review β auto-decomposition failed"], |
| ) |
|
|
| def enrich_with_pdf(self, pdf_path: str) -> str: |
| """ |
| Extract text from a PDF specification document. |
| |
| Uses basic text extraction (no heavy dependencies). |
| Falls back to reading the file as plain text if PDF parsing unavailable. |
| """ |
| try: |
| import subprocess |
| result = subprocess.run( |
| ["pdftotext", "-layout", pdf_path, "-"], |
| capture_output=True, text=True, timeout=30 |
| ) |
| if result.returncode == 0 and result.stdout.strip(): |
| return result.stdout |
| except (FileNotFoundError, subprocess.TimeoutExpired): |
| pass |
|
|
| |
| try: |
| with open(pdf_path, "r", errors="ignore") as f: |
| return f.read() |
| except Exception: |
| return "" |
|
|
| def sid_to_rtl_prompt(self, sid: StructuredSpecDict) -> str: |
| """ |
| Convert a SID into a detailed RTL generation prompt. |
| |
| This is what gets fed to the Coder agent β it's a precise, |
| unambiguous specification derived from the JSON contract. |
| """ |
| sections = [] |
| sections.append(f"# RTL Specification for {sid.top_module}") |
| sections.append(f"Chip Family: {sid.chip_family}") |
| sections.append(f"Description: {sid.description}") |
| sections.append(f"Reset: {sid.reset_style} ({sid.reset_polarity})") |
| sections.append(f"Interface: {sid.interface_protocol or 'custom'}") |
|
|
| if sid.parameters: |
| sections.append("\n## Global Parameters") |
| for p in sid.parameters: |
| sections.append(f" parameter {p.name} = {p.default} // {p.description}") |
|
|
| for sm in sid.sub_modules: |
| sections.append(f"\n## Module: {sm.name}") |
| sections.append(f" Description: {sm.description}") |
|
|
| if sm.parameters: |
| sections.append(" Parameters:") |
| for p in sm.parameters: |
| sections.append(f" parameter {p.name} = {p.default} // {p.description}") |
|
|
| sections.append(" Ports:") |
| for p in sm.ports: |
| rv = f" (reset: {p.reset_value})" if p.reset_value else "" |
| sections.append(f" {p.direction} [{p.width}] {p.name} β {p.description}{rv}") |
|
|
| sections.append(f" Functional Logic:\n {sm.functional_logic}") |
|
|
| if sm.fsm_states: |
| sections.append(" FSM States:") |
| for s in sm.fsm_states: |
| sections.append(f" {s.name}: {s.description}") |
| for t in s.transitions: |
| sections.append(f" β {t.get('next_state')} when {t.get('condition')}") |
|
|
| if sm.instantiates: |
| sections.append(f" Instantiates: {', '.join(sm.instantiates)}") |
|
|
| if sid.verification_hints: |
| sections.append("\n## Verification Hints") |
| for h in sid.verification_hints: |
| sections.append(f" - {h}") |
|
|
| return "\n".join(sections) |
|
|