File size: 17,586 Bytes
4056e80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6b47db
 
 
 
4056e80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ed3642
4056e80
f6b47db
4056e80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6b47db
4056e80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
"""
Architect Module β€” Spec2RTL Decomposer Agent
=============================================

Based on: Spec2RTL-Agent (arXiv:2405.xxxxx)

Before writing any Verilog, this module reads the input specification (text/PDF)
and produces a Structured Information Dictionary (SID) in JSON format.

The SID explicitly defines:
  - Top-level module name, parameters, ports
  - Sub-module names, inputs, outputs, and functional logic
  - FSM state maps, datapath descriptions, timing constraints
  - Interface protocols and reset strategy

This JSON contract becomes the SINGLE SOURCE OF TRUTH for all downstream agents
(Coder, Verifier, Debugger) β€” eliminating ambiguity and hallucination.
"""

import json
import re
import logging
import os
from dataclasses import dataclass, field, asdict
from typing import Any, Dict, List, Optional, Tuple
from crewai import Agent, Task, Crew, LLM

logger = logging.getLogger(__name__)


# ─── Structured Information Dictionary Schema ────────────────────────

@dataclass
class PortDef:
    """Single port definition."""
    name: str
    direction: str          # "input" | "output" | "inout"
    width: str              # e.g. "8", "DATA_WIDTH", "1"
    description: str = ""
    reset_value: str = ""   # Only for output registers


@dataclass
class ParameterDef:
    """Parameterisation slot."""
    name: str
    default: str
    description: str = ""


@dataclass
class FSMStateDef:
    """Single FSM state."""
    name: str
    encoding: str = ""
    description: str = ""
    transitions: List[Dict[str, str]] = field(default_factory=list)
    outputs: Dict[str, str] = field(default_factory=dict)


@dataclass
class SubModuleDef:
    """One sub-module (including the top-level module itself)."""
    name: str
    description: str = ""
    parameters: List[ParameterDef] = field(default_factory=list)
    ports: List[PortDef] = field(default_factory=list)
    functional_logic: str = ""                # Natural language description
    rtl_skeleton: str = ""                    # Verilog skeleton (optional)
    fsm_states: List[FSMStateDef] = field(default_factory=list)
    internal_signals: List[Dict[str, str]] = field(default_factory=list)
    instantiates: List[str] = field(default_factory=list)  # Sub-module names


@dataclass
class StructuredSpecDict:
    """
    Complete Structured Information Dictionary for a chip design.
    This is the JSON contract between the Architect β†’ Coder β†’ Verifier pipeline.
    """
    design_name: str
    chip_family: str            # e.g. "counter", "FIFO", "UART", "AES", "RISC-V"
    description: str
    top_module: str
    reset_style: str = "sync"   # "sync" | "async"
    clock_name: str = "clk"
    reset_name: str = "rst_n"
    reset_polarity: str = "active_low"
    parameters: List[ParameterDef] = field(default_factory=list)
    sub_modules: List[SubModuleDef] = field(default_factory=list)
    interface_protocol: str = ""     # "AXI4-Stream" | "APB" | "wishbone" | "custom"
    timing_notes: str = ""
    verification_hints: List[str] = field(default_factory=list)  # Hints for TB agent

    def to_json(self) -> str:
        return json.dumps(asdict(self), indent=2)

    @classmethod
    def from_json(cls, json_str: str) -> "StructuredSpecDict":
        data = json.loads(json_str)
        # Reconstruct nested dataclasses
        params = [ParameterDef(**p) for p in data.pop("parameters", [])]
        subs = []
        for sm in data.pop("sub_modules", []):
            sm_params = [ParameterDef(**p) for p in sm.pop("parameters", [])]
            sm_ports = [PortDef(**p) for p in sm.pop("ports", [])]
            sm_fsm = [FSMStateDef(**s) for s in sm.pop("fsm_states", [])]
            subs.append(SubModuleDef(parameters=sm_params, ports=sm_ports,
                                     fsm_states=sm_fsm, **sm))
        return cls(parameters=params, sub_modules=subs, **data)

    def validate(self) -> Tuple[bool, List[str]]:
        """Validate the SID for completeness and consistency."""
        errors: List[str] = []
        if not self.design_name:
            errors.append("design_name is empty")
        if not self.top_module:
            errors.append("top_module is empty")
        if not self.sub_modules:
            errors.append("No sub_modules defined")
        for sm in self.sub_modules:
            if not sm.name:
                errors.append("Sub-module has empty name")
            if not sm.ports:
                errors.append(f"Sub-module '{sm.name}' has no ports")
            if not sm.functional_logic:
                errors.append(f"Sub-module '{sm.name}' has no functional_logic")
            # Check clk/rst on sequential modules
            port_names = {p.name for p in sm.ports}
            if sm.fsm_states and self.clock_name not in port_names:
                errors.append(f"Sub-module '{sm.name}' has FSM but no '{self.clock_name}' port")
        return len(errors) == 0, errors


# ─── Decomposer Prompt Templates ─────────────────────────────────────

DECOMPOSE_SYSTEM_PROMPT = """\
You are a Principal VLSI Architect performing Spec-to-RTL decomposition.

TASK: Given a natural-language chip specification, produce a COMPLETE Structured 
Information Dictionary (SID) in **valid JSON format**.

The JSON MUST follow this EXACT schema:
{schema}

MANDATORY RULES:
1. Every module (including top-level) MUST appear in "sub_modules" with ALL fields populated.
2. Every sub-module MUST have at minimum: name, ports (with direction and width), functional_logic.
3. For sequential designs, clk and rst_n ports are MANDATORY.
4. FSM modules MUST list ALL states with transitions and outputs.
5. Use "parameters" for configurable widths/depths β€” NEVER hardcode magic numbers.
6. "functional_logic" must be a CONCISE (under 100 words) specification of the behavior. DO NOT generate Verilog skeletons in this JSON.
7. CRITICAL JSON RULES: You are generating a massive JSON object. You MUST double check your syntax. NEVER use unescaped quotes inside strings. NEVER leave trailing commas before closing braces. Ensure all objects and arrays are properly closed.
8. Limit the JSON size by omitting any unnecessary commentary, and avoiding massive unneeded string literals.
9. IF THE DESIGN IS MASSIVE (e.g. CPUs, SoCs, Superscalar systems): You MUST OMIT the `fsm_states` and `internal_signals` arrays entirely to save tokens. The Designer module will independently infer those.
"""

DECOMPOSE_USER_PROMPT = """\
DESIGN NAME: {design_name}
SPECIFICATION: {spec_text}

Produce the complete Structured Information Dictionary (JSON) for this chip design.
Decompose into sub-modules where architecturally appropriate (e.g., separate datapath,
controller, interface adapter). For simple designs, a single top-level module suffices.
"""


# ─── The Architect Module ────────────────────────────────────────────

class ArchitectModule:
    """
    Spec2RTL Decomposer Agent.
    
    Reads a natural language specification and produces a StructuredSpecDict 
    (JSON) that defines every sub-module, port, parameter, and FSM state 
    BEFORE any Verilog is written.
    """

    # Minimal JSON schema description for the LLM prompt
    _SCHEMA_DESC = json.dumps({
        "design_name": "str",
        "chip_family": "str (counter|ALU|FIFO|FSM|UART|SPI|AXI|crypto|processor|SoC|...)",
        "description": "str",
        "top_module": "str (Verilog identifier)",
        "reset_style": "sync|async",
        "clock_name": "str",
        "reset_name": "str",
        "reset_polarity": "active_low|active_high",
        "parameters": [{"name": "str", "default": "str", "description": "str"}],
        "sub_modules": [{
            "name": "str (Verilog identifier)",
            "description": "str",
            "parameters": [{"name": "str", "default": "str", "description": "str"}],
            "ports": [{"name": "str", "direction": "input|output",
                        "width": "str", "description": "str", "reset_value": "str"}],
            "functional_logic": "CONCISE natural-language description of behavior (Max 100 words)",
            "fsm_states": [{"name": "str", "encoding": "str", "description": "str",
                            "transitions": [{"condition": "str", "next_state": "str"}],
                            "outputs": {"signal": "value"}}],
            "internal_signals": [{"name": "str", "width": "str", "purpose": "str"}],
            "instantiates": ["sub_module_name"]
        }],
        "interface_protocol": "str",
        "timing_notes": "str",
        "verification_hints": ["str"]
    }, indent=2)

    def __init__(self, llm: LLM, verbose: bool = False, max_retries: int = 3):
        self.llm = llm
        self.verbose = verbose
        self.max_retries = max_retries

    def decompose(self, design_name: str, spec_text: str,
                  save_path: Optional[str] = None) -> StructuredSpecDict:
        """
        Main entry point: decompose a natural-language spec into a StructuredSpecDict.
        
        Args:
            design_name: Verilog-safe design name.
            spec_text:   Natural language specification (or existing MAS).
            save_path:   Optional path to save the JSON artifact.
            
        Returns:
            Validated StructuredSpecDict.
        """
        logger.info(f"[Architect] Decomposing spec for '{design_name}'")

        system_prompt = DECOMPOSE_SYSTEM_PROMPT.format(schema=self._SCHEMA_DESC)
        user_prompt = DECOMPOSE_USER_PROMPT.format(
            design_name=design_name,
            spec_text=spec_text[:12000],  # Truncate to fit context
        )

        sid = None
        last_error = ""

        for attempt in range(1, self.max_retries + 1):
            logger.info(f"[Architect] Decompose attempt {attempt}/{self.max_retries}")

            # Build the CrewAI agent for this attempt
            retry_context = ""
            if last_error:
                retry_context = (
                    f"\n\nPREVIOUS ATTEMPT FAILED WITH:\n{last_error}\n"
                    "Fix the issues and return a corrected JSON. Ensure there are no trailing commas and double quotes are escaped."
                )

            agent = Agent(
                role="Spec2RTL Decomposer",
                goal=f"Produce a complete Structured Information Dictionary for {design_name}",
                backstory=(
                    "You are a world-class VLSI architect who converts natural-language "
                    "chip specifications into precise, machine-readable JSON contracts. "
                    "You never leave fields empty or use placeholders."
                ),
                llm=self.llm,
                verbose=self.verbose,
            )

            task = Task(
                description=system_prompt + "\n\n" + user_prompt + retry_context,
                expected_output="Valid JSON matching the Structured Information Dictionary schema",
                agent=agent,
            )

            try:
                raw = str(Crew(agents=[agent], tasks=[task]).kickoff())
                sid = self._parse_response(raw, design_name)
                
                # Validate
                ok, errs = sid.validate()
                if not ok:
                    last_error = "Validation errors:\n" + "\n".join(f"  - {e}" for e in errs)
                    logger.warning(f"[Architect] Validation failed: {errs}")
                    sid = None
                    continue

                logger.info(f"[Architect] Successfully decomposed into "
                            f"{len(sid.sub_modules)} sub-modules")
                break

            except Exception as e:
                last_error = f"Parse/execution error: {str(e)}"
                logger.warning(f"[Architect] Attempt {attempt} failed: {e}")
                continue

        if sid is None:
            # Fallback: create a minimal SID from the spec text
            logger.warning("[Architect] All attempts failed β€” generating fallback SID")
            sid = self._fallback_sid(design_name, spec_text)

        # Persist artifact
        if save_path:
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            with open(save_path, "w") as f:
                f.write(sid.to_json())
            logger.info(f"[Architect] SID saved to {save_path}")

        return sid

    def _parse_response(self, raw: str, design_name: str) -> StructuredSpecDict:
        """Extract JSON from LLM response (may contain markdown fences)."""
        text = raw.strip()

        # Strip markdown fences
        json_match = re.search(r'```(?:json)?\s*([\s\S]*?)```', text)
        if json_match:
            text = json_match.group(1).strip()

        # Try to find the outermost JSON object
        brace_start = text.find('{')
        brace_end = text.rfind('}')
        if brace_start >= 0 and brace_end > brace_start:
            text = text[brace_start:brace_end + 1]

        data = json.loads(text)

        # Ensure design_name is set
        if not data.get("design_name"):
            data["design_name"] = design_name
        if not data.get("top_module"):
            data["top_module"] = design_name

        return StructuredSpecDict.from_json(json.dumps(data))

    def _fallback_sid(self, design_name: str, spec_text: str) -> StructuredSpecDict:
        """Generate a minimal SID when LLM decomposition fails."""
        return StructuredSpecDict(
            design_name=design_name,
            chip_family="unknown",
            description=spec_text[:2000],
            top_module=design_name,
            reset_style="sync",
            parameters=[],
            sub_modules=[
                SubModuleDef(
                    name=design_name,
                    description=spec_text[:2000],
                    ports=[
                        PortDef(name="clk", direction="input", width="1", description="System clock"),
                        PortDef(name="rst_n", direction="input", width="1", description="Active-low reset"),
                    ],
                    functional_logic=spec_text[:2000],
                )
            ],
            verification_hints=["Requires manual specification review β€” auto-decomposition failed"],
        )

    def enrich_with_pdf(self, pdf_path: str) -> str:
        """
        Extract text from a PDF specification document.
        
        Uses basic text extraction (no heavy dependencies).
        Falls back to reading the file as plain text if PDF parsing unavailable.
        """
        try:
            import subprocess
            result = subprocess.run(
                ["pdftotext", "-layout", pdf_path, "-"],
                capture_output=True, text=True, timeout=30
            )
            if result.returncode == 0 and result.stdout.strip():
                return result.stdout
        except (FileNotFoundError, subprocess.TimeoutExpired):
            pass

        # Fallback: try reading as plain text
        try:
            with open(pdf_path, "r", errors="ignore") as f:
                return f.read()
        except Exception:
            return ""

    def sid_to_rtl_prompt(self, sid: StructuredSpecDict) -> str:
        """
        Convert a SID into a detailed RTL generation prompt.
        
        This is what gets fed to the Coder agent β€” it's a precise, 
        unambiguous specification derived from the JSON contract.
        """
        sections = []
        sections.append(f"# RTL Specification for {sid.top_module}")
        sections.append(f"Chip Family: {sid.chip_family}")
        sections.append(f"Description: {sid.description}")
        sections.append(f"Reset: {sid.reset_style} ({sid.reset_polarity})")
        sections.append(f"Interface: {sid.interface_protocol or 'custom'}")

        if sid.parameters:
            sections.append("\n## Global Parameters")
            for p in sid.parameters:
                sections.append(f"  parameter {p.name} = {p.default}  // {p.description}")

        for sm in sid.sub_modules:
            sections.append(f"\n## Module: {sm.name}")
            sections.append(f"  Description: {sm.description}")

            if sm.parameters:
                sections.append("  Parameters:")
                for p in sm.parameters:
                    sections.append(f"    parameter {p.name} = {p.default}  // {p.description}")

            sections.append("  Ports:")
            for p in sm.ports:
                rv = f" (reset: {p.reset_value})" if p.reset_value else ""
                sections.append(f"    {p.direction} [{p.width}] {p.name} β€” {p.description}{rv}")

            sections.append(f"  Functional Logic:\n    {sm.functional_logic}")

            if sm.fsm_states:
                sections.append("  FSM States:")
                for s in sm.fsm_states:
                    sections.append(f"    {s.name}: {s.description}")
                    for t in s.transitions:
                        sections.append(f"      β†’ {t.get('next_state')} when {t.get('condition')}")

            if sm.instantiates:
                sections.append(f"  Instantiates: {', '.join(sm.instantiates)}")

        if sid.verification_hints:
            sections.append("\n## Verification Hints")
            for h in sid.verification_hints:
                sections.append(f"  - {h}")

        return "\n".join(sections)