AgentIC / src /agentic /core /architect.py
vxkyyy's picture
feat: update LLM model references , change some prompts
0ed3642
"""
Architect Module β€” Spec2RTL Decomposer Agent
=============================================
Based on: Spec2RTL-Agent (arXiv:2405.xxxxx)
Before writing any Verilog, this module reads the input specification (text/PDF)
and produces a Structured Information Dictionary (SID) in JSON format.
The SID explicitly defines:
- Top-level module name, parameters, ports
- Sub-module names, inputs, outputs, and functional logic
- FSM state maps, datapath descriptions, timing constraints
- Interface protocols and reset strategy
This JSON contract becomes the SINGLE SOURCE OF TRUTH for all downstream agents
(Coder, Verifier, Debugger) β€” eliminating ambiguity and hallucination.
"""
import json
import re
import logging
import os
from dataclasses import dataclass, field, asdict
from typing import Any, Dict, List, Optional, Tuple
from crewai import Agent, Task, Crew, LLM
logger = logging.getLogger(__name__)
# ─── Structured Information Dictionary Schema ────────────────────────
@dataclass
class PortDef:
"""Single port definition."""
name: str
direction: str # "input" | "output" | "inout"
width: str # e.g. "8", "DATA_WIDTH", "1"
description: str = ""
reset_value: str = "" # Only for output registers
@dataclass
class ParameterDef:
"""Parameterisation slot."""
name: str
default: str
description: str = ""
@dataclass
class FSMStateDef:
"""Single FSM state."""
name: str
encoding: str = ""
description: str = ""
transitions: List[Dict[str, str]] = field(default_factory=list)
outputs: Dict[str, str] = field(default_factory=dict)
@dataclass
class SubModuleDef:
"""One sub-module (including the top-level module itself)."""
name: str
description: str = ""
parameters: List[ParameterDef] = field(default_factory=list)
ports: List[PortDef] = field(default_factory=list)
functional_logic: str = "" # Natural language description
rtl_skeleton: str = "" # Verilog skeleton (optional)
fsm_states: List[FSMStateDef] = field(default_factory=list)
internal_signals: List[Dict[str, str]] = field(default_factory=list)
instantiates: List[str] = field(default_factory=list) # Sub-module names
@dataclass
class StructuredSpecDict:
"""
Complete Structured Information Dictionary for a chip design.
This is the JSON contract between the Architect β†’ Coder β†’ Verifier pipeline.
"""
design_name: str
chip_family: str # e.g. "counter", "FIFO", "UART", "AES", "RISC-V"
description: str
top_module: str
reset_style: str = "sync" # "sync" | "async"
clock_name: str = "clk"
reset_name: str = "rst_n"
reset_polarity: str = "active_low"
parameters: List[ParameterDef] = field(default_factory=list)
sub_modules: List[SubModuleDef] = field(default_factory=list)
interface_protocol: str = "" # "AXI4-Stream" | "APB" | "wishbone" | "custom"
timing_notes: str = ""
verification_hints: List[str] = field(default_factory=list) # Hints for TB agent
def to_json(self) -> str:
return json.dumps(asdict(self), indent=2)
@classmethod
def from_json(cls, json_str: str) -> "StructuredSpecDict":
data = json.loads(json_str)
# Reconstruct nested dataclasses
params = [ParameterDef(**p) for p in data.pop("parameters", [])]
subs = []
for sm in data.pop("sub_modules", []):
sm_params = [ParameterDef(**p) for p in sm.pop("parameters", [])]
sm_ports = [PortDef(**p) for p in sm.pop("ports", [])]
sm_fsm = [FSMStateDef(**s) for s in sm.pop("fsm_states", [])]
subs.append(SubModuleDef(parameters=sm_params, ports=sm_ports,
fsm_states=sm_fsm, **sm))
return cls(parameters=params, sub_modules=subs, **data)
def validate(self) -> Tuple[bool, List[str]]:
"""Validate the SID for completeness and consistency."""
errors: List[str] = []
if not self.design_name:
errors.append("design_name is empty")
if not self.top_module:
errors.append("top_module is empty")
if not self.sub_modules:
errors.append("No sub_modules defined")
for sm in self.sub_modules:
if not sm.name:
errors.append("Sub-module has empty name")
if not sm.ports:
errors.append(f"Sub-module '{sm.name}' has no ports")
if not sm.functional_logic:
errors.append(f"Sub-module '{sm.name}' has no functional_logic")
# Check clk/rst on sequential modules
port_names = {p.name for p in sm.ports}
if sm.fsm_states and self.clock_name not in port_names:
errors.append(f"Sub-module '{sm.name}' has FSM but no '{self.clock_name}' port")
return len(errors) == 0, errors
# ─── Decomposer Prompt Templates ─────────────────────────────────────
DECOMPOSE_SYSTEM_PROMPT = """\
You are a Principal VLSI Architect performing Spec-to-RTL decomposition.
TASK: Given a natural-language chip specification, produce a COMPLETE Structured
Information Dictionary (SID) in **valid JSON format**.
The JSON MUST follow this EXACT schema:
{schema}
MANDATORY RULES:
1. Every module (including top-level) MUST appear in "sub_modules" with ALL fields populated.
2. Every sub-module MUST have at minimum: name, ports (with direction and width), functional_logic.
3. For sequential designs, clk and rst_n ports are MANDATORY.
4. FSM modules MUST list ALL states with transitions and outputs.
5. Use "parameters" for configurable widths/depths β€” NEVER hardcode magic numbers.
6. "functional_logic" must be a CONCISE (under 100 words) specification of the behavior. DO NOT generate Verilog skeletons in this JSON.
7. CRITICAL JSON RULES: You are generating a massive JSON object. You MUST double check your syntax. NEVER use unescaped quotes inside strings. NEVER leave trailing commas before closing braces. Ensure all objects and arrays are properly closed.
8. Limit the JSON size by omitting any unnecessary commentary, and avoiding massive unneeded string literals.
9. IF THE DESIGN IS MASSIVE (e.g. CPUs, SoCs, Superscalar systems): You MUST OMIT the `fsm_states` and `internal_signals` arrays entirely to save tokens. The Designer module will independently infer those.
"""
DECOMPOSE_USER_PROMPT = """\
DESIGN NAME: {design_name}
SPECIFICATION: {spec_text}
Produce the complete Structured Information Dictionary (JSON) for this chip design.
Decompose into sub-modules where architecturally appropriate (e.g., separate datapath,
controller, interface adapter). For simple designs, a single top-level module suffices.
"""
# ─── The Architect Module ────────────────────────────────────────────
class ArchitectModule:
"""
Spec2RTL Decomposer Agent.
Reads a natural language specification and produces a StructuredSpecDict
(JSON) that defines every sub-module, port, parameter, and FSM state
BEFORE any Verilog is written.
"""
# Minimal JSON schema description for the LLM prompt
_SCHEMA_DESC = json.dumps({
"design_name": "str",
"chip_family": "str (counter|ALU|FIFO|FSM|UART|SPI|AXI|crypto|processor|SoC|...)",
"description": "str",
"top_module": "str (Verilog identifier)",
"reset_style": "sync|async",
"clock_name": "str",
"reset_name": "str",
"reset_polarity": "active_low|active_high",
"parameters": [{"name": "str", "default": "str", "description": "str"}],
"sub_modules": [{
"name": "str (Verilog identifier)",
"description": "str",
"parameters": [{"name": "str", "default": "str", "description": "str"}],
"ports": [{"name": "str", "direction": "input|output",
"width": "str", "description": "str", "reset_value": "str"}],
"functional_logic": "CONCISE natural-language description of behavior (Max 100 words)",
"fsm_states": [{"name": "str", "encoding": "str", "description": "str",
"transitions": [{"condition": "str", "next_state": "str"}],
"outputs": {"signal": "value"}}],
"internal_signals": [{"name": "str", "width": "str", "purpose": "str"}],
"instantiates": ["sub_module_name"]
}],
"interface_protocol": "str",
"timing_notes": "str",
"verification_hints": ["str"]
}, indent=2)
def __init__(self, llm: LLM, verbose: bool = False, max_retries: int = 3):
self.llm = llm
self.verbose = verbose
self.max_retries = max_retries
def decompose(self, design_name: str, spec_text: str,
save_path: Optional[str] = None) -> StructuredSpecDict:
"""
Main entry point: decompose a natural-language spec into a StructuredSpecDict.
Args:
design_name: Verilog-safe design name.
spec_text: Natural language specification (or existing MAS).
save_path: Optional path to save the JSON artifact.
Returns:
Validated StructuredSpecDict.
"""
logger.info(f"[Architect] Decomposing spec for '{design_name}'")
system_prompt = DECOMPOSE_SYSTEM_PROMPT.format(schema=self._SCHEMA_DESC)
user_prompt = DECOMPOSE_USER_PROMPT.format(
design_name=design_name,
spec_text=spec_text[:12000], # Truncate to fit context
)
sid = None
last_error = ""
for attempt in range(1, self.max_retries + 1):
logger.info(f"[Architect] Decompose attempt {attempt}/{self.max_retries}")
# Build the CrewAI agent for this attempt
retry_context = ""
if last_error:
retry_context = (
f"\n\nPREVIOUS ATTEMPT FAILED WITH:\n{last_error}\n"
"Fix the issues and return a corrected JSON. Ensure there are no trailing commas and double quotes are escaped."
)
agent = Agent(
role="Spec2RTL Decomposer",
goal=f"Produce a complete Structured Information Dictionary for {design_name}",
backstory=(
"You are a world-class VLSI architect who converts natural-language "
"chip specifications into precise, machine-readable JSON contracts. "
"You never leave fields empty or use placeholders."
),
llm=self.llm,
verbose=self.verbose,
)
task = Task(
description=system_prompt + "\n\n" + user_prompt + retry_context,
expected_output="Valid JSON matching the Structured Information Dictionary schema",
agent=agent,
)
try:
raw = str(Crew(agents=[agent], tasks=[task]).kickoff())
sid = self._parse_response(raw, design_name)
# Validate
ok, errs = sid.validate()
if not ok:
last_error = "Validation errors:\n" + "\n".join(f" - {e}" for e in errs)
logger.warning(f"[Architect] Validation failed: {errs}")
sid = None
continue
logger.info(f"[Architect] Successfully decomposed into "
f"{len(sid.sub_modules)} sub-modules")
break
except Exception as e:
last_error = f"Parse/execution error: {str(e)}"
logger.warning(f"[Architect] Attempt {attempt} failed: {e}")
continue
if sid is None:
# Fallback: create a minimal SID from the spec text
logger.warning("[Architect] All attempts failed β€” generating fallback SID")
sid = self._fallback_sid(design_name, spec_text)
# Persist artifact
if save_path:
os.makedirs(os.path.dirname(save_path), exist_ok=True)
with open(save_path, "w") as f:
f.write(sid.to_json())
logger.info(f"[Architect] SID saved to {save_path}")
return sid
def _parse_response(self, raw: str, design_name: str) -> StructuredSpecDict:
"""Extract JSON from LLM response (may contain markdown fences)."""
text = raw.strip()
# Strip markdown fences
json_match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
if json_match:
text = json_match.group(1).strip()
# Try to find the outermost JSON object
brace_start = text.find('{')
brace_end = text.rfind('}')
if brace_start >= 0 and brace_end > brace_start:
text = text[brace_start:brace_end + 1]
data = json.loads(text)
# Ensure design_name is set
if not data.get("design_name"):
data["design_name"] = design_name
if not data.get("top_module"):
data["top_module"] = design_name
return StructuredSpecDict.from_json(json.dumps(data))
def _fallback_sid(self, design_name: str, spec_text: str) -> StructuredSpecDict:
"""Generate a minimal SID when LLM decomposition fails."""
return StructuredSpecDict(
design_name=design_name,
chip_family="unknown",
description=spec_text[:2000],
top_module=design_name,
reset_style="sync",
parameters=[],
sub_modules=[
SubModuleDef(
name=design_name,
description=spec_text[:2000],
ports=[
PortDef(name="clk", direction="input", width="1", description="System clock"),
PortDef(name="rst_n", direction="input", width="1", description="Active-low reset"),
],
functional_logic=spec_text[:2000],
)
],
verification_hints=["Requires manual specification review β€” auto-decomposition failed"],
)
def enrich_with_pdf(self, pdf_path: str) -> str:
"""
Extract text from a PDF specification document.
Uses basic text extraction (no heavy dependencies).
Falls back to reading the file as plain text if PDF parsing unavailable.
"""
try:
import subprocess
result = subprocess.run(
["pdftotext", "-layout", pdf_path, "-"],
capture_output=True, text=True, timeout=30
)
if result.returncode == 0 and result.stdout.strip():
return result.stdout
except (FileNotFoundError, subprocess.TimeoutExpired):
pass
# Fallback: try reading as plain text
try:
with open(pdf_path, "r", errors="ignore") as f:
return f.read()
except Exception:
return ""
def sid_to_rtl_prompt(self, sid: StructuredSpecDict) -> str:
"""
Convert a SID into a detailed RTL generation prompt.
This is what gets fed to the Coder agent β€” it's a precise,
unambiguous specification derived from the JSON contract.
"""
sections = []
sections.append(f"# RTL Specification for {sid.top_module}")
sections.append(f"Chip Family: {sid.chip_family}")
sections.append(f"Description: {sid.description}")
sections.append(f"Reset: {sid.reset_style} ({sid.reset_polarity})")
sections.append(f"Interface: {sid.interface_protocol or 'custom'}")
if sid.parameters:
sections.append("\n## Global Parameters")
for p in sid.parameters:
sections.append(f" parameter {p.name} = {p.default} // {p.description}")
for sm in sid.sub_modules:
sections.append(f"\n## Module: {sm.name}")
sections.append(f" Description: {sm.description}")
if sm.parameters:
sections.append(" Parameters:")
for p in sm.parameters:
sections.append(f" parameter {p.name} = {p.default} // {p.description}")
sections.append(" Ports:")
for p in sm.ports:
rv = f" (reset: {p.reset_value})" if p.reset_value else ""
sections.append(f" {p.direction} [{p.width}] {p.name} β€” {p.description}{rv}")
sections.append(f" Functional Logic:\n {sm.functional_logic}")
if sm.fsm_states:
sections.append(" FSM States:")
for s in sm.fsm_states:
sections.append(f" {s.name}: {s.description}")
for t in s.transitions:
sections.append(f" β†’ {t.get('next_state')} when {t.get('condition')}")
if sm.instantiates:
sections.append(f" Instantiates: {', '.join(sm.instantiates)}")
if sid.verification_hints:
sections.append("\n## Verification Hints")
for h in sid.verification_hints:
sections.append(f" - {h}")
return "\n".join(sections)