Spaces:

skumar889
/

semiconductor-pipeline

Running

Sai Kumar Taraka commited on 13 days ago

Commit

8cd3050

1 Parent(s): 04ea8a9

feat: Advanced ML V2 Model with Reinforcement Learning

## NEW FEATURES:
- Advanced Reinforcement Learning with 4 exploration strategies:
* UCB1 (Upper Confidence Bound) - default, best for exploration/exploitation
* Softmax (Boltzmann) - probabilistic based on Q-values
* Epsilon-Greedy - simple with decaying randomness
* Thompson Sampling - Bayesian approach with Beta distributions
- Experience Replay Buffer (10,000 capacity) for stable learning
- Eligibility Traces for better credit assignment in sequential tasks
- Context-Aware Pattern Learning with N-grams and Association Rules
- Deep UVM Compliance Validation:
* Factory registration checking
* Phase implementation verification
* TLM connection completeness
* Signal-direction matching
* Register field width/access validation
* Coverage model completeness checking

## BUG FIXES:
- Fixed templates to only use YAML-declared signals (removed extra modem signals: dsr_n, ri_n, dcd_n)
- Fixed sequence.sv.j2: Removed invalid 'foreach (tx_data.size())' loop
- Fixed uart_rx_seq extending wrong class (now extends base_seq)

## UI ENHANCEMENTS:
- New ML Insights tab showing:
* Learning statistics
* RL metrics (episodes, Q-values, updates)
* Pattern analysis
* Strategy distribution
- Model selection: V2 (Recommended) option
- Exploration strategy selection (UCB, Softmax, Epsilon-Greedy, Thompson)
- Learning state import/export

## FILES ADDED:
- src/models/advanced_rl_learner.py - Advanced RL with experience replay
- src/models/advanced_pattern_learner.py - Context-aware pattern mining
- src/models/advanced_code_validator.py - Deep UVM compliance validator
- src/models/enhanced_ml_model_v2.py - Integrated V2 ML model
- tests/quick_v2_test.py - Quick smoke test
- tests/test_advanced_ml_v2.py - Comprehensive test suite

## FILES MODIFIED:
- src/config.py - Added V2 model config fields
- src/pipeline.py - V2 model selection and integration
- streamlit_app.py - UI enhancements for V2 model
- src/generation/templates/*.j2 - Bug fixes and signal alignments

Files changed (14) hide show

src/config.py +3 -1
src/generation/templates/interface.sv.j2 +4 -4
src/generation/templates/rtl/protocol_core.v.j2 +4 -5
src/generation/templates/sequence.sv.j2 +2 -2
src/generation/templates/test.sv.j2 +0 -3
src/generation/templates/testbench.sv.j2 +13 -19
src/models/advanced_code_validator.py +1294 -0
src/models/advanced_pattern_learner.py +926 -0
src/models/advanced_rl_learner.py +728 -0
src/models/enhanced_ml_model_v2.py +801 -0
src/pipeline.py +30 -13
streamlit_app.py +455 -152
tests/quick_v2_test.py +130 -0
tests/test_advanced_ml_v2.py +477 -0

src/config.py CHANGED Viewed

@@ -88,7 +88,7 @@ class AutoTrainConfig(BaseModel):
 class MLConfig(BaseModel):
     """Configuration for AI/ML-augmented generation with actual learning capabilities."""
     enabled: bool = False
-    model_type: str = Field(default="template", pattern=r"^(template|ml|hybrid|llm|semantic)$")
     similarity_threshold: float = Field(default=0.75, ge=0.0, le=1.0)
     auto_learn: bool = True
     index_path: Optional[str] = None
@@ -109,6 +109,8 @@ class MLConfig(BaseModel):
     learning_rate: float = Field(default=0.1, ge=0.001, le=1.0)
     reinforcement_discount: float = Field(default=0.9, ge=0.0, le=1.0)
     exploration_epsilon: float = Field(default=0.05, ge=0.0, le=0.5)
 class PipelineConfig(BaseModel):

 class MLConfig(BaseModel):
     """Configuration for AI/ML-augmented generation with actual learning capabilities."""
     enabled: bool = False
+    model_type: str = Field(default="template", pattern=r"^(template|ml|hybrid|llm|semantic|v2)$")
     similarity_threshold: float = Field(default=0.75, ge=0.0, le=1.0)
     auto_learn: bool = True
     index_path: Optional[str] = None
     learning_rate: float = Field(default=0.1, ge=0.001, le=1.0)
     reinforcement_discount: float = Field(default=0.9, ge=0.0, le=1.0)
     exploration_epsilon: float = Field(default=0.05, ge=0.0, le=0.5)
+    exploration_strategy: str = Field(default="ucb", pattern=r"^(epsilon_greedy|softmax|ucb|thompson)$")
+    strict_validation: bool = False
 class PipelineConfig(BaseModel):

src/generation/templates/interface.sv.j2 CHANGED Viewed

@@ -10,16 +10,16 @@ interface {{ spec.design_name }}_intf (input logic clk, input logic rst_n);
   logic       wb_ack;
   // Serial
   logic       uart_tx, uart_rx;
-  // Modem
-  logic       cts_n, rts_n, dsr_n, dtr_n, ri_n, dcd_n, out1_n, out2_n;
   // Interrupt
   logic       uart_intr;
   clocking drv_cb @(posedge clk);
     default input #1ns output #1ns;
     output wb_cyc, wb_stb, wb_we, wb_addr, wb_data_o;
-    output uart_rx, cts_n, dsr_n, ri_n, dcd_n;
-    input  wb_ack, wb_data_i, uart_tx, uart_intr, rts_n, dtr_n, out1_n, out2_n;
   endclocking
 {% elif p == "spi" %}

   logic       wb_ack;
   // Serial
   logic       uart_tx, uart_rx;
+  // Modem (as per spec)
+  logic       cts_n, rts_n;
   // Interrupt
   logic       uart_intr;
   clocking drv_cb @(posedge clk);
     default input #1ns output #1ns;
     output wb_cyc, wb_stb, wb_we, wb_addr, wb_data_o;
+    output uart_rx, cts_n;
+    input  wb_ack, wb_data_i, uart_tx, uart_intr, rts_n;
   endclocking
 {% elif p == "spi" %}

src/generation/templates/rtl/protocol_core.v.j2 CHANGED Viewed

@@ -16,9 +16,9 @@ module {{ spec.design_name }}_core (
   // Serial
   output logic       uart_tx,
   input  logic       uart_rx,
-  // Modem
-  input  logic       cts_n, dsr_n, ri_n, dcd_n,
-  output logic       rts_n, dtr_n, out1_n, out2_n,
   output logic       uart_intr
 );
   logic [7:0] reg_lcr, reg_scr;
@@ -33,8 +33,7 @@ module {{ spec.design_name }}_core (
         3'h7: reg_scr <= wb_data_i;
       endcase
   assign uart_tx = uart_rx;
-  assign rts_n = 0; assign dtr_n = 0;
-  assign out1_n = 1; assign out2_n = 1;
   assign uart_intr = 0;
 {% elif p == "spi" %}
   // Wishbone bus

   // Serial
   output logic       uart_tx,
   input  logic       uart_rx,
+  // Modem (as per spec)
+  input  logic       cts_n,
+  output logic       rts_n,
   output logic       uart_intr
 );
   logic [7:0] reg_lcr, reg_scr;
         3'h7: reg_scr <= wb_data_i;
       endcase
   assign uart_tx = uart_rx;
+  assign rts_n = 0;
   assign uart_intr = 0;
 {% elif p == "spi" %}
   // Wishbone bus

src/generation/templates/sequence.sv.j2 CHANGED Viewed

@@ -185,7 +185,7 @@ class uart_tx_seq extends {{ spec.design_name }}_base_seq;
     `uvm_info("UART_TX", $sformatf("Transmitting %0d bytes: %p", num_bytes, tx_data), UVM_MEDIUM)
-    foreach (tx_data.size()) begin
       wait_for_tx_empty();
       if (reg_model) begin
@@ -221,7 +221,7 @@ class uart_tx_seq extends {{ spec.design_name }}_base_seq;
   endtask
 endclass
-class uart_rx_seq extends {{ spec.design_name }}_seq_item);
   `uvm_object_utils(uart_rx_seq)
   logic [7:0] rx_data[$];

     `uvm_info("UART_TX", $sformatf("Transmitting %0d bytes: %p", num_bytes, tx_data), UVM_MEDIUM)
+    for (int i = 0; i < num_bytes; i++) begin
       wait_for_tx_empty();
       if (reg_model) begin
   endtask
 endclass
+class uart_rx_seq extends {{ spec.design_name }}_base_seq;
   `uvm_object_utils(uart_rx_seq)
   logic [7:0] rx_data[$];

src/generation/templates/test.sv.j2 CHANGED Viewed

@@ -57,9 +57,6 @@ class {{ spec.design_name }}_base_test extends uvm_test;
     `uvm_info("TEST", "Starting test...", UVM_LOW)
     vif.uart_rx <= 1'b1;
     vif.cts_n <= 1'b0;
-    vif.dsr_n <= 1'b0;
-    vif.ri_n <= 1'b1;
-    vif.dcd_n <= 1'b1;
     run_top_sequence();
     phase.drop_objection(this);
   endtask

     `uvm_info("TEST", "Starting test...", UVM_LOW)
     vif.uart_rx <= 1'b1;
     vif.cts_n <= 1'b0;
     run_top_sequence();
     phase.drop_objection(this);
   endtask

src/generation/templates/testbench.sv.j2 CHANGED Viewed

@@ -13,25 +13,19 @@ module testbench;
   {{ spec.design_name }}_core dut (
     .clk  (clk),
     .rst_n(rst_n),
-    {% if p == "uart" %}
-    .wb_cyc    (intf.wb_cyc),
-    .wb_stb    (intf.wb_stb),
-    .wb_we     (intf.wb_we),
-    .wb_addr   (intf.wb_addr),
-    .wb_data_i (intf.wb_data_o),
-    .wb_data_o (intf.wb_data_i),
-    .wb_ack    (intf.wb_ack),
-    .uart_tx   (intf.uart_tx),
-    .uart_rx   (intf.uart_rx),
-    .cts_n     (intf.cts_n),
-    .rts_n     (intf.rts_n),
-    .dsr_n     (intf.dsr_n),
-    .dtr_n     (intf.dtr_n),
-    .ri_n      (intf.ri_n),
-    .dcd_n     (intf.dcd_n),
-    .out1_n    (intf.out1_n),
-    .out2_n    (intf.out2_n),
-    .uart_intr (intf.uart_intr)
     {% elif p == "spi" %}
     .wb_cyc    (intf.wb_cyc),
     .wb_stb    (intf.wb_stb),

   {{ spec.design_name }}_core dut (
     .clk  (clk),
     .rst_n(rst_n),
+     {% if p == "uart" %}
+     .wb_cyc    (intf.wb_cyc),
+     .wb_stb    (intf.wb_stb),
+     .wb_we     (intf.wb_we),
+     .wb_addr   (intf.wb_addr),
+     .wb_data_i (intf.wb_data_o),
+     .wb_data_o (intf.wb_data_i),
+     .wb_ack    (intf.wb_ack),
+     .uart_tx   (intf.uart_tx),
+     .uart_rx   (intf.uart_rx),
+     .cts_n     (intf.cts_n),
+     .rts_n     (intf.rts_n),
+     .uart_intr (intf.uart_intr)
     {% elif p == "spi" %}
     .wb_cyc    (intf.wb_cyc),
     .wb_stb    (intf.wb_stb),

src/models/advanced_code_validator.py ADDED Viewed

	@@ -0,0 +1,1294 @@

+"""
+Advanced Code Validator for UVM Testbench Generation.
+Key improvements for promotion:
+1. Deep UVM compliance checking with factory registration validation
+2. Signal-direction matching validation
+3. Register field width and access validation
+4. Phase implementation completeness checking
+5. TLM connection completeness validation
+6. Compile-ready validation with SV syntax rules
+7. Context-aware error detection with fix suggestions
+8. Spec compliance with hierarchical signal checking
+9. Coverage completeness checking
+10. Scoreboard/TLM connection validation
+"""
+from __future__ import annotations
+import logging
+import re
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Dict, List, Optional, Set, Tuple, Pattern
+from collections import defaultdict, Counter
+logger = logging.getLogger("uvmgen.validator.advanced")
+class ValidationSeverity(Enum):
+    ERROR = "error"
+    WARNING = "warning"
+    INFO = "info"
+    STYLE = "style"
+@dataclass
+class ValidationIssue:
+    severity: ValidationSeverity
+    code: str
+    message: str
+    line_number: Optional[int] = None
+    context: Optional[str] = None
+    suggestion: Optional[str] = None
+    auto_fixable: bool = False
+    confidence: float = 1.0
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "severity": self.severity.value,
+            "code": self.code,
+            "message": self.message,
+            "line_number": self.line_number,
+            "context": self.context,
+            "suggestion": self.suggestion,
+            "auto_fixable": self.auto_fixable,
+            "confidence": self.confidence,
+        }
+@dataclass
+class FileValidationResult:
+    filename: str
+    file_type: str
+    passed: bool
+    issues: List[ValidationIssue] = field(default_factory=list)
+    checks_run: int = 0
+    checks_passed: int = 0
+    score: float = 0.0
+    @property
+    def error_count(self) -> int:
+        return sum(1 for i in self.issues if i.severity == ValidationSeverity.ERROR)
+    @property
+    def warning_count(self) -> int:
+        return sum(1 for i in self.issues if i.severity == ValidationSeverity.WARNING)
+    @property
+    def info_count(self) -> int:
+        return sum(1 for i in self.issues if i.severity == ValidationSeverity.INFO)
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "filename": self.filename,
+            "file_type": self.file_type,
+            "passed": self.passed,
+            "error_count": self.error_count,
+            "warning_count": self.warning_count,
+            "info_count": self.info_count,
+            "checks_run": self.checks_run,
+            "checks_passed": self.checks_passed,
+            "score": self.score,
+            "issues": [i.to_dict() for i in self.issues],
+        }
+@dataclass
+class ValidationReport:
+    design_name: str
+    overall_passed: bool
+    files: List[FileValidationResult] = field(default_factory=list)
+    timestamp: str = ""
+    @property
+    def total_errors(self) -> int:
+        return sum(f.error_count for f in self.files)
+    @property
+    def total_warnings(self) -> int:
+        return sum(f.warning_count for f in self.files)
+    @property
+    def total_checks_run(self) -> int:
+        return sum(f.checks_run for f in self.files)
+    @property
+    def total_checks_passed(self) -> int:
+        return sum(f.checks_passed for f in self.files)
+    @property
+    def pass_rate(self) -> float:
+        if self.total_checks_run == 0:
+            return 1.0
+        return self.total_checks_passed / self.total_checks_run
+    @property
+    def avg_score(self) -> float:
+        if not self.files:
+            return 0.0
+        return sum(f.score for f in self.files) / len(self.files)
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "design_name": self.design_name,
+            "overall_passed": self.overall_passed,
+            "total_errors": self.total_errors,
+            "total_warnings": self.total_warnings,
+            "total_checks_run": self.total_checks_run,
+            "total_checks_passed": self.total_checks_passed,
+            "pass_rate": round(self.pass_rate * 100, 1),
+            "avg_score": round(self.avg_score, 3),
+            "files": [f.to_dict() for f in self.files],
+        }
+class UVMComplianceChecker:
+    """Deep UVM compliance checking."""
+    UVM_BASE_CLASSES = {
+        "uvm_test", "uvm_env", "uvm_agent", "uvm_driver", "uvm_monitor",
+        "uvm_sequencer", "uvm_sequence", "uvm_sequence_item", "uvm_scoreboard",
+        "uvm_subscriber", "uvm_reg_block", "uvm_reg", "uvm_reg_field",
+        "uvm_reg_map", "uvm_reg_adapter", "uvm_reg_predictor",
+        "uvm_analysis_port", "uvm_analysis_imp", "uvm_tlm_fifo",
+        "uvm_component", "uvm_object", "uvm_report_object",
+    }
+    UVM_PHASES = [
+        "build_phase", "connect_phase", "end_of_elaboration_phase",
+        "start_of_simulation_phase", "run_phase", "extract_phase",
+        "check_phase", "report_phase", "final_phase",
+    ]
+    REQUIRED_PHASES_BY_TYPE = {
+        "test": {"build_phase", "run_phase"},
+        "env": {"build_phase", "connect_phase"},
+        "agent": {"build_phase", "connect_phase"},
+        "driver": {"build_phase", "run_phase"},
+        "monitor": {"build_phase", "run_phase"},
+        "scoreboard": {"build_phase", "connect_phase"},
+    }
+    def __init__(self):
+        self._patterns = self._compile_patterns()
+    def _compile_patterns(self) -> Dict[str, Pattern]:
+        return {
+            "class_decl": re.compile(r'\bclass\s+(\w+)\s*(?:#\s*\(\s*[^)]*\)\s*)?(?:extends\s+(\w+))?'),
+            "extends_uvm": re.compile(r'\bextends\s+(uvm_\w+)'),
+            "uvm_component_utils": re.compile(r'`uvm_component_utils\s*\(\s*(\w+)\s*\)'),
+            "uvm_object_utils": re.compile(r'`uvm_object_utils\s*\(\s*(\w+)\s*\)'),
+            "uvm_field_utils": re.compile(r'`uvm_field_\w+\s*\('),
+            "phase_decl": re.compile(r'\b(virtual\s+)?(function|task)\s+(\w+_phase)\s*\('),
+            "config_db_set": re.compile(r'uvm_config_db\s*#\s*<\s*([^>]+)\s*>\s*::\s*set\s*\('),
+            "config_db_get": re.compile(r'uvm_config_db\s*#\s*<\s*([^>]+)\s*>\s*::\s*get\s*\('),
+            "analysis_port_decl": re.compile(r'\buvm_analysis_port\s*#\s*<\s*(\w+)\s*>\s*(\w+)'),
+            "analysis_imp_decl": re.compile(r'\buvm_analysis_imp\s*#\s*<\s*(\w+)\s*,\s*(\w+)\s*>\s*(\w+)'),
+            "tlm_fifo_decl": re.compile(r'\buvm_tlm_(analysis_)?fifo\s*#\s*<\s*(\w+)\s*>\s*(\w+)'),
+            "raise_objection": re.compile(r'\braise_objection\s*\('),
+            "drop_objection": re.compile(r'\bdrop_objection\s*\('),
+            "seq_item_port_decl": re.compile(r'\buvm_seq_item_pull_port\s*#\s*<\s*(\w+)\s*>\s*(\w+)'),
+            "seq_item_port_get": re.compile(r'\bseq_item_port\s*\.\s*(get_next_item|get|peek)\s*\('),
+            "seq_item_port_done": re.compile(r'\bseq_item_port\s*\.\s*item_done\s*\('),
+            "type_id_create": re.compile(r'\b(\w+)\s*::\s*type_id\s*::\s*create\s*\('),
+            "reg_model_decl": re.compile(r'\b(\w+_reg_block)\s+(\w+)'),
+            "reg_write": re.compile(r'\breg_model\s*\.\s*(\w+)\s*\.\s*write\s*\('),
+            "reg_read": re.compile(r'\breg_model\s*\.\s*(\w+)\s*\.\s*read\s*\('),
+        }
+    def check_uvm_compliance(
+        self,
+        content: str,
+        file_type: str,
+        lines: List[str],
+    ) -> List[ValidationIssue]:
+        """Check deep UVM compliance."""
+        issues: List[ValidationIssue] = []
+        class_decl = self._patterns["class_decl"].search(content)
+        if not class_decl:
+            return issues
+        class_name = class_decl.group(1)
+        extends_match = self._patterns["extends_uvm"].search(content)
+        is_uvm_class = extends_match or any(uvm_base in content for uvm_base in self.UVM_BASE_CLASSES)
+        if not is_uvm_class:
+            return issues
+        parent_class = extends_match.group(1) if extends_match else "unknown"
+        issues.extend(self._check_factory_registration(
+            content, class_name, parent_class, lines
+        ))
+        issues.extend(self._check_phase_implementation(
+            content, file_type, class_name, lines
+        ))
+        issues.extend(self._check_component_specific(
+            content, file_type, parent_class, lines
+        ))
+        issues.extend(self._check_objection_handling(
+            content, file_type, lines
+        ))
+        return issues
+    def _check_factory_registration(
+        self,
+        content: str,
+        class_name: str,
+        parent_class: str,
+        lines: List[str],
+    ) -> List[ValidationIssue]:
+        """Check proper UVM factory registration."""
+        issues: List[ValidationIssue] = []
+        is_component = any(c in parent_class for c in [
+            "test", "env", "agent", "driver", "monitor", "scoreboard",
+            "sequencer", "subscriber", "component"
+        ])
+        is_object = any(o in parent_class for o in [
+            "sequence", "sequence_item", "reg", "object"
+        ])
+        if not (is_component or is_object):
+            return issues
+        component_utils = self._patterns["uvm_component_utils"].search(content)
+        object_utils = self._patterns["uvm_object_utils"].search(content)
+        if is_component:
+            if not component_utils:
+                line_num = self._find_class_line(class_name, lines)
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="UVM-FACTORY-001",
+                    message=f"Component class '{class_name}' missing `uvm_component_utils macro",
+                    line_number=line_num,
+                    suggestion=f"Add `uvm_component_utils({class_name}) after the class declaration",
+                    auto_fixable=True,
+                    confidence=0.95,
+                ))
+            elif component_utils.group(1) != class_name:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="UVM-FACTORY-002",
+                    message=f"uvm_component_utils has wrong class name: expected '{class_name}', got '{component_utils.group(1)}'",
+                    suggestion=f"Change `uvm_component_utils({component_utils.group(1)}) to `uvm_component_utils({class_name})",
+                    auto_fixable=True,
+                    confidence=0.9,
+                ))
+        if is_object and not is_component:
+            if not object_utils:
+                line_num = self._find_class_line(class_name, lines)
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="UVM-FACTORY-003",
+                    message=f"Object class '{class_name}' missing `uvm_object_utils macro",
+                    line_number=line_num,
+                    suggestion=f"Add `uvm_object_utils({class_name}) after the class declaration",
+                    auto_fixable=True,
+                    confidence=0.95,
+                ))
+            elif object_utils.group(1) != class_name:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="UVM-FACTORY-004",
+                    message=f"uvm_object_utils has wrong class name: expected '{class_name}', got '{object_utils.group(1)}'",
+                    suggestion=f"Change `uvm_object_utils({object_utils.group(1)}) to `uvm_object_utils({class_name})",
+                    auto_fixable=True,
+                    confidence=0.9,
+                ))
+        if component_utils or object_utils:
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.INFO,
+                code="UVM-FACTORY-OK",
+                message=f"Class '{class_name}' properly registered with UVM factory",
+                confidence=1.0,
+            ))
+        return issues
+    def _check_phase_implementation(
+        self,
+        content: str,
+        file_type: str,
+        class_name: str,
+        lines: List[str],
+    ) -> List[ValidationIssue]:
+        """Check UVM phase implementation completeness."""
+        issues: List[ValidationIssue] = []
+        found_phases: Set[str] = set()
+        phase_lines: Dict[str, int] = {}
+        for i, line in enumerate(lines, 1):
+            phase_match = self._patterns["phase_decl"].search(line)
+            if phase_match:
+                phase_name = phase_match.group(3)
+                if phase_name in self.UVM_PHASES:
+                    found_phases.add(phase_name)
+                    phase_lines[phase_name] = i
+        required_phases = self.REQUIRED_PHASES_BY_TYPE.get(file_type, set())
+        missing_phases = required_phases - found_phases
+        for phase in sorted(missing_phases):
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.WARNING,
+                code="UVM-PHASE-001",
+                message=f"Class '{class_name}' may be missing {phase} implementation",
+                suggestion=f"Consider implementing {phase} for proper UVM component behavior",
+                auto_fixable=False,
+                confidence=0.7,
+            ))
+        if "run_phase" in found_phases:
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.INFO,
+                code="UVM-PHASE-OK",
+                message=f"Class '{class_name}' implements run_phase",
+                confidence=1.0,
+            ))
+        if "build_phase" in found_phases and "connect_phase" in found_phases:
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.INFO,
+                code="UVM-PHASE-STRUCTURE",
+                message=f"Class '{class_name}' has proper build/connect phase structure",
+                confidence=1.0,
+            ))
+        return issues
+    def _check_component_specific(
+        self,
+        content: str,
+        file_type: str,
+        parent_class: str,
+        lines: List[str],
+    ) -> List[ValidationIssue]:
+        """Check component-specific UVM patterns."""
+        issues: List[ValidationIssue] = []
+        if "driver" in file_type or "driver" in parent_class.lower():
+            seq_item_port = self._patterns["seq_item_port_decl"].search(content)
+            if not seq_item_port:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.WARNING,
+                    code="UVM-DRIVER-001",
+                    message="Driver should declare seq_item_port",
+                    suggestion="Add: uvm_seq_item_pull_port #(seq_item_type) seq_item_port",
+                    auto_fixable=False,
+                    confidence=0.8,
+                ))
+            else:
+                get_next_item = self._patterns["seq_item_port_get"].search(content)
+                item_done = self._patterns["seq_item_port_done"].search(content)
+                if not get_next_item:
+                    issues.append(ValidationIssue(
+                        severity=ValidationSeverity.WARNING,
+                        code="UVM-DRIVER-002",
+                        message="Driver should call seq_item_port.get_next_item()",
+                        suggestion="Use seq_item_port.get_next_item(req) to retrieve sequence items",
+                        confidence=0.75,
+                    ))
+                if not item_done:
+                    issues.append(ValidationIssue(
+                        severity=ValidationSeverity.WARNING,
+                        code="UVM-DRIVER-003",
+                        message="Driver should call seq_item_port.item_done()",
+                        suggestion="Use seq_item_port.item_done() after processing each item",
+                        confidence=0.75,
+                    ))
+        if "monitor" in file_type or "monitor" in parent_class.lower():
+            analysis_port = self._patterns["analysis_port_decl"].search(content)
+            if not analysis_port:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.WARNING,
+                    code="UVM-MONITOR-001",
+                    message="Monitor should declare an analysis_port",
+                    suggestion="Add: uvm_analysis_port #(item_type) analysis_port",
+                    auto_fixable=False,
+                    confidence=0.8,
+                ))
+            else:
+                write_call = re.search(r'\b' + re.escape(analysis_port.group(2)) + r'\s*\.\s*write\s*\(', content)
+                if not write_call:
+                    issues.append(ValidationIssue(
+                        severity=ValidationSeverity.WARNING,
+                        code="UVM-MONITOR-002",
+                        message=f"Monitor should call {analysis_port.group(2)}.write()",
+                        suggestion=f"Call {analysis_port.group(2)}.write(item) for each collected transaction",
+                        confidence=0.75,
+                    ))
+        if "scoreboard" in file_type or "subscriber" in parent_class.lower():
+            analysis_imp = self._patterns["analysis_imp_decl"].search(content)
+            if analysis_imp:
+                write_method = re.search(r'\bfunction\s+void\s+write\s*\(\s*' + re.escape(analysis_imp.group(1)) + r'\s+', content)
+                if not write_method:
+                    issues.append(ValidationIssue(
+                        severity=ValidationSeverity.WARNING,
+                        code="UVM-SCB-001",
+                        message="Scoreboard/subscriber should implement write() function",
+                        suggestion=f"Add: function void write({analysis_imp.group(1)} item)",
+                        confidence=0.8,
+                    ))
+        return issues
+    def _check_objection_handling(
+        self,
+        content: str,
+        file_type: str,
+        lines: List[str],
+    ) -> List[ValidationIssue]:
+        """Check objection handling in tests and sequences."""
+        issues: List[ValidationIssue] = []
+        if file_type not in ("test", "sequence"):
+            return issues
+        has_raise = self._patterns["raise_objection"].search(content)
+        has_drop = self._patterns["drop_objection"].search(content)
+        if file_type == "test":
+            if not has_raise:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.WARNING,
+                    code="UVM-OBJECTION-001",
+                    message="Test should raise objection in run_phase",
+                    suggestion="Add: phase.raise_objection(this) at start of run_phase",
+                    auto_fixable=False,
+                    confidence=0.85,
+                ))
+            if not has_drop:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.WARNING,
+                    code="UVM-OBJECTION-002",
+                    message="Test should drop objection in run_phase",
+                    suggestion="Add: phase.drop_objection(this) at end of run_phase",
+                    auto_fixable=False,
+                    confidence=0.85,
+                ))
+            if has_raise and has_drop:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.INFO,
+                    code="UVM-OBJECTION-OK",
+                    message="Test has proper objection handling (raise/drop)",
+                    confidence=1.0,
+                ))
+        return issues
+    @staticmethod
+    def _find_class_line(class_name: str, lines: List[str]) -> Optional[int]:
+        """Find the line number of a class declaration."""
+        pattern = re.compile(r'\bclass\s+' + re.escape(class_name) + r'\b')
+        for i, line in enumerate(lines, 1):
+            if pattern.search(line):
+                return i
+        return None
+class SpecComplianceChecker:
+    """Advanced spec compliance checking."""
+    def __init__(self, spec_dict: Dict[str, Any]):
+        self.spec = spec_dict
+        self.design_name = spec_dict.get("design_name", "unknown")
+        self._extract_signals()
+        self._extract_registers()
+        self._extract_clock_reset()
+    def _extract_signals(self) -> None:
+        self.all_signals: Set[str] = set()
+        self.signals_by_direction: Dict[str, Set[str]] = {
+            "input": set(), "output": set(), "inout": set(),
+        }
+        self.signal_widths: Dict[str, int] = {}
+        self.signal_interfaces: Dict[str, str] = {}
+        for iface in self.spec.get("interfaces", []):
+            iface_name = iface.get("name", "unknown")
+            for sig in iface.get("signals", []):
+                name = sig.get("name", "")
+                if name:
+                    self.all_signals.add(name)
+                    direction = sig.get("direction", "input")
+                    self.signals_by_direction.get(direction, set()).add(name)
+                    self.signal_widths[name] = sig.get("width", 1)
+                    self.signal_interfaces[name] = iface_name
+    def _extract_registers(self) -> None:
+        self.all_registers: Set[str] = set()
+        self.register_addresses: Dict[str, str] = {}
+        self.register_fields: Dict[str, Dict[str, Dict[str, Any]]] = {}
+        self.register_access: Dict[str, str] = {}
+        for reg in self.spec.get("registers", []):
+            name = reg.get("name", "")
+            if name:
+                self.all_registers.add(name)
+                self.register_addresses[name] = reg.get("address", "")
+                self.register_access[name] = reg.get("access", "rw")
+                fields: Dict[str, Dict[str, Any]] = {}
+                for field in reg.get("fields", []):
+                    field_name = field.get("name", "")
+                    if field_name:
+                        fields[field_name] = {
+                            "bits": field.get("bits", "0"),
+                            "description": field.get("description", ""),
+                        }
+                self.register_fields[name] = fields
+    def _extract_clock_reset(self) -> None:
+        cr = self.spec.get("clock_reset", {})
+        self.clock_signal = cr.get("clock", "clk")
+        self.reset_signal = cr.get("reset", "rst_n")
+        self.reset_active = cr.get("reset_active", 0)
+    def check_spec_compliance(
+        self,
+        content: str,
+        file_type: str,
+        lines: List[str],
+    ) -> Tuple[List[ValidationIssue], Dict[str, Any]]:
+        """Check compliance with design spec."""
+        issues: List[ValidationIssue] = []
+        metrics: Dict[str, Any] = {
+            "signals_found": set(),
+            "signals_missing": set(),
+            "registers_found": set(),
+            "registers_missing": set(),
+            "signal_coverage": 0.0,
+            "register_coverage": 0.0,
+        }
+        stripped = self._strip_for_analysis(content)
+        found_signals: Set[str] = set()
+        for sig in self.all_signals:
+            if re.search(r'\b' + re.escape(sig) + r'\b', stripped, re.IGNORECASE):
+                found_signals.add(sig)
+        metrics["signals_found"] = found_signals
+        if file_type in ("interface", "testbench"):
+            missing_signals = self.all_signals - found_signals
+            metrics["signals_missing"] = missing_signals
+            for sig in sorted(missing_signals):
+                direction = self._get_signal_direction(sig)
+                width = self.signal_widths.get(sig, 1)
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="SPEC-SIGNAL-001",
+                    message=f"Signal '{sig}' [{direction}, {width}bit] from spec not found in {file_type}",
+                    suggestion=f"Add signal declaration: {direction} logic {'' if width == 1 else f'[{width-1}:0]'}{sig}",
+                    auto_fixable=False,
+                    confidence=0.95,
+                ))
+        for sig in sorted(found_signals & self.all_signals):
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.INFO,
+                code="SPEC-SIGNAL-OK",
+                message=f"Signal '{sig}' from spec is properly referenced",
+                confidence=1.0,
+            ))
+        if self.all_signals:
+            metrics["signal_coverage"] = len(found_signals) / len(self.all_signals)
+        if file_type in ("ral_model", "test", "sequence", "scoreboard", "env"):
+            found_registers: Set[str] = set()
+            for reg in self.all_registers:
+                if re.search(r'\b' + re.escape(reg.lower()) + r'\b', stripped.lower()):
+                    found_registers.add(reg)
+            metrics["registers_found"] = found_registers
+            if file_type == "ral_model" and self.all_registers:
+                missing_regs = self.all_registers - found_registers
+                metrics["registers_missing"] = missing_regs
+                for reg in sorted(missing_regs):
+                    addr = self.register_addresses.get(reg, "unknown")
+                    access = self.register_access.get(reg, "rw")
+                    issues.append(ValidationIssue(
+                        severity=ValidationSeverity.ERROR,
+                        code="SPEC-REG-001",
+                        message=f"Register '{reg}' [@0x{addr}, {access}] from spec not found in RAL model",
+                        suggestion=f"Create uvm_reg class for register '{reg}' with address 0x{addr}",
+                        auto_fixable=False,
+                        confidence=0.9,
+                    ))
+            for reg in sorted(found_registers & self.all_registers):
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.INFO,
+                    code="SPEC-REG-OK",
+                    message=f"Register '{reg}' from spec is properly referenced",
+                    confidence=1.0,
+                ))
+            if self.all_registers:
+                metrics["register_coverage"] = len(found_registers) / len(self.all_registers)
+        if file_type in ("interface", "testbench"):
+            clock_found = re.search(r'\b' + re.escape(self.clock_signal) + r'\b', stripped, re.IGNORECASE)
+            reset_found = re.search(r'\b' + re.escape(self.reset_signal) + r'\b', stripped, re.IGNORECASE)
+            if not clock_found:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="SPEC-CLK-001",
+                    message=f"Clock signal '{self.clock_signal}' from spec not found",
+                    suggestion=f"Add clock signal: input logic {self.clock_signal}",
+                    auto_fixable=False,
+                    confidence=0.95,
+                ))
+            if not reset_found:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="SPEC-RST-001",
+                    message=f"Reset signal '{self.reset_signal}' from spec not found",
+                    suggestion=f"Add reset signal: input logic {self.reset_signal}",
+                    auto_fixable=False,
+                    confidence=0.95,
+                ))
+            if clock_found and reset_found:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.INFO,
+                    code="SPEC-CLK-RST-OK",
+                    message=f"Clock '{self.clock_signal}' and reset '{self.reset_signal}' from spec are present",
+                    confidence=1.0,
+                ))
+        return issues, metrics
+    def _get_signal_direction(self, signal: str) -> str:
+        for direction, signals in self.signals_by_direction.items():
+            if signal in signals:
+                return direction
+        return "unknown"
+    @staticmethod
+    def _strip_for_analysis(content: str) -> str:
+        result = content
+        result = re.sub(r'/\*.*?\*/', ' ', result, flags=re.DOTALL)
+        result = re.sub(r'//.*$', ' ', result, flags=re.MULTILINE)
+        result = re.sub(r'"[^"]*"', 'STR', result)
+        return result
+class SystemVerilogSyntaxChecker:
+    """Advanced SystemVerilog syntax checking."""
+    PAIR_CHECKS = [
+        ("module", ["endmodule"]),
+        ("interface", ["endinterface"]),
+        ("class", ["endclass"]),
+        ("function", ["endfunction"]),
+        ("task", ["endtask"]),
+        ("case", ["endcase"]),
+        ("begin", ["end"]),
+        ("fork", ["join", "join_any", "join_none"]),
+    ]
+    SV_KEYWORDS = {
+        "module", "endmodule", "interface", "endinterface", "class", "endclass",
+        "input", "output", "inout", "logic", "reg", "wire", "bit", "int", "integer",
+        "always", "initial", "assign", "begin", "end", "case", "endcase", "if", "else",
+        "for", "while", "repeat", "forever", "task", "endtask", "function", "endfunction",
+        "parameter", "localparam", "defparam", "typedef", "struct", "union", "enum",
+        "posedge", "negedge", "or", "and", "not", "default", "none",
+        "import", "export", "package", "endpackage", "include", "define",
+        "virtual", "rand", "randc", "constraint", "extends", "implements",
+        "time", "realtime", "shortint", "longint", "byte", "shortreal", "real",
+        "string", "void", "null", "break", "continue", "return", "disable",
+        "static", "automatic", "const", "var", "signed", "unsigned",
+    }
+    def __init__(self):
+        self._patterns = self._compile_patterns()
+    def _compile_patterns(self) -> Dict[str, Pattern]:
+        return {
+            "comment_single": re.compile(r'//.*$', re.MULTILINE),
+            "comment_multi": re.compile(r'/\*.*?\*/', re.DOTALL),
+            "string_lit": re.compile(r'"[^"]*"'),
+            "module_decl": re.compile(r'\bmodule\s+(\w+)\s*[#(;]'),
+            "interface_decl": re.compile(r'\binterface\s+(\w+)\s*[#(;]'),
+            "class_decl": re.compile(r'\bclass\s+(\w+)\s*(?:#\s*\(|extends|implements|;|{)'),
+            "port_list": re.compile(r'\(([^)]+)\)'),
+            "unbalanced_paren": re.compile(r'[()]'),
+            "unbalanced_bracket": re.compile(r'[\[\]]'),
+            "unbalanced_brace": re.compile(r'[{}]'),
+            "semicolon": re.compile(r';\s*$'),
+            "time_unit": re.compile(r'`timescale\s+(\d+[munp]?s)/(\d+[munp]?s)'),
+            "include_uvm": re.compile(r'`include\s+"uvm_macros\.svh"'),
+            "import_uvm": re.compile(r'import\s+uvm_pkg::\*'),
+            "uvm_macro": re.compile(r'`uvm_\w+'),
+            " timescale_missing": re.compile(r'^module\b|\binterface\b|\bclass\b', re.MULTILINE),
+        }
+    def check(self, content: str, lines: List[str]) -> List[ValidationIssue]:
+        """Run comprehensive syntax checks."""
+        issues: List[ValidationIssue] = []
+        issues.extend(self._check_compile_ready(content, lines))
+        issues.extend(self.check_balance(content))
+        issues.extend(self.check_begin_end_pairs(content, lines))
+        issues.extend(self.check_semicolons(content, lines))
+        issues.extend(self._check_uvm_setup(content, lines))
+        return issues
+    def _check_compile_ready(
+        self,
+        content: str,
+        lines: List[str],
+    ) -> List[ValidationIssue]:
+        """Check compile-ready attributes."""
+        issues: List[ValidationIssue] = []
+        has_timescale = self._patterns["time_unit"].search(content)
+        has_module = self._patterns["module_decl"].search(content)
+        has_interface = self._patterns["interface_decl"].search(content)
+        if (has_module or has_interface) and not has_timescale:
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.WARNING,
+                code="SV-SYN-001",
+                message="Module/interface without `timescale directive",
+                suggestion="Add: `timescale 1ns/1ps at top of file",
+                auto_fixable=True,
+                confidence=0.8,
+            ))
+        uvm_macros = self._patterns["uvm_macro"].findall(content)
+        if uvm_macros:
+            has_include = self._patterns["include_uvm"].search(content)
+            has_import = self._patterns["import_uvm"].search(content)
+            if not has_include:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.ERROR,
+                    code="SV-UVM-001",
+                    message="UVM macros used but `include \"uvm_macros.svh\" missing",
+                    suggestion="Add: `include \"uvm_macros.svh\" at top of file",
+                    auto_fixable=True,
+                    confidence=0.95,
+                ))
+            if not has_import:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.WARNING,
+                    code="SV-UVM-002",
+                    message="UVM macros used but import uvm_pkg::* missing",
+                    suggestion="Add: import uvm_pkg::*; after include",
+                    auto_fixable=True,
+                    confidence=0.85,
+                ))
+        return issues
+    def _check_uvm_setup(
+        self,
+        content: str,
+        lines: List[str],
+    ) -> List[ValidationIssue]:
+        """Check UVM setup completeness."""
+        issues: List[ValidationIssue] = []
+        has_include = self._patterns["include_uvm"].search(content)
+        has_import = self._patterns["import_uvm"].search(content)
+        if has_include and has_import:
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.INFO,
+                code="SV-UVM-SETUP-OK",
+                message="UVM setup complete (include + import)",
+                confidence=1.0,
+            ))
+        return issues
+    def _strip_comments_and_strings(self, content: str) -> str:
+        result = content
+        result = self._patterns["comment_multi"].sub(" ", result)
+        result = self._patterns["comment_single"].sub(" ", result)
+        result = self._patterns["string_lit"].sub("\"STR\"", result)
+        return result
+    def check_balance(self, content: str) -> List[ValidationIssue]:
+        issues: List[ValidationIssue] = []
+        stripped = self._strip_comments_and_strings(content)
+        checks = [
+            ("()", "parentheses"),
+            ("[]", "brackets"),
+            ("{}", "braces"),
+        ]
+        for pair, name in checks:
+            count_open = stripped.count(pair[0])
+            count_close = stripped.count(pair[1])
+            if count_open != count_close:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.WARNING,
+                    code=f"SV-SYN-BAL-{name}",
+                    message=f"Possibly unbalanced {name}: {count_open} '{pair[0]}' vs {count_close} '{pair[1]}'",
+                    auto_fixable=False,
+                    confidence=0.7,
+                ))
+        return issues
+    def check_begin_end_pairs(self, content: str, lines: List[str]) -> List[ValidationIssue]:
+        issues: List[ValidationIssue] = []
+        stripped = self._strip_comments_and_strings(content)
+        stripped_lines = stripped.split('\n')
+        for open_kw, close_kws in self.PAIR_CHECKS:
+            close_kws_set = set(close_kws)
+            close_kw_display = close_kws[0] if len(close_kws) == 1 else f"{close_kws[0]}/..."
+            stack: List[int] = []
+            for line_num, line in enumerate(stripped_lines, 1):
+                words = re.findall(r'\b\w+\b', line.lower())
+                for word in words:
+                    if word == open_kw:
+                        stack.append(line_num)
+                    elif word in close_kws_set:
+                        if stack:
+                            stack.pop()
+            for line_num in stack:
+                issues.append(ValidationIssue(
+                    severity=ValidationSeverity.WARNING,
+                    code="SV-SYN-BLOCK",
+                    message=f"'{open_kw}' at line {line_num} may have no matching '{close_kw_display}'",
+                    line_number=line_num,
+                    auto_fixable=False,
+                    confidence=0.6,
+                ))
+        return issues
+    def check_semicolons(self, content: str, lines: List[str]) -> List[ValidationIssue]:
+        issues: List[ValidationIssue] = []
+        statement_keywords = {
+            "logic", "reg", "wire", "bit", "int", "shortint", "longint", "byte",
+            "input", "output", "inout", "parameter", "localparam", "typedef",
+            "import", "export", "assign", "return", "break", "continue",
+        }
+        block_starters = {
+            "module", "interface", "class", "function", "task", "case",
+            "begin", "fork", "if", "else", "for", "while", "repeat", "forever",
+            "package",
+        }
+        block_enders = {
+            "endmodule", "endinterface", "endclass", "endfunction", "endtask",
+            "endcase", "end", "join", "join_any", "join_none", "endpackage",
+        }
+        for line_num, line in enumerate(lines, 1):
+            stripped = line.strip()
+            if not stripped:
+                continue
+            if stripped.startswith('//'):
+                continue
+            if stripped.startswith('`'):
+                continue
+            first_word = stripped.split()[0].lower() if stripped.split() else ""
+            if first_word in block_enders:
+                continue
+            if first_word in block_starters:
+                if stripped.rstrip().endswith((':', 'begin', '{', ';')):
+                    continue
+            if first_word in statement_keywords:
+                if not stripped.rstrip().endswith(';') and not stripped.rstrip().endswith(')'):
+                    issues.append(ValidationIssue(
+                        severity=ValidationSeverity.WARNING,
+                        code="SV-SYN-SEMICOLON",
+                        message="Possible missing semicolon",
+                        line_number=line_num,
+                        context=stripped[:60],
+                        suggestion="Add ';' at end of statement",
+                        auto_fixable=True,
+                        confidence=0.6,
+                    ))
+        return issues
+class CoverageCompletenessChecker:
+    """Check coverage model completeness."""
+    def check_coverage(
+        self,
+        content: str,
+        spec_dict: Dict[str, Any],
+        file_type: str,
+    ) -> List[ValidationIssue]:
+        """Check coverage model completeness."""
+        issues: List[ValidationIssue] = []
+        if file_type not in ("coverage", "coverage_collector"):
+            return issues
+        registers = spec_dict.get("registers", [])
+        register_names = [r.get("name", "") for r in registers if r.get("name")]
+        covergroups = re.findall(r'\bcovergroup\s+(\w+)', content)
+        coverpoints = re.findall(r'\bcoverpoint\s+(\w+)', content)
+        crosses = re.findall(r'\bcross\s+(\w+(?:\s*,\s*\w+)*)', content)
+        if not covergroups:
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.WARNING,
+                code="COV-001",
+                message="No covergroups found in coverage collector",
+                suggestion="Define covergroups for register accesses, protocol operations",
+                confidence=0.7,
+            ))
+        else:
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.INFO,
+                code="COV-002",
+                message=f"Found {len(covergroups)} covergroup(s): {', '.join(covergroups)}",
+                confidence=1.0,
+            ))
+        if coverpoints:
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.INFO,
+                code="COV-003",
+                message=f"Found {len(coverpoints)} coverpoint(s)",
+                confidence=1.0,
+            ))
+        if crosses:
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.INFO,
+                code="COV-004",
+                message=f"Found {len(crosses)} cross coverage(s)",
+                confidence=1.0,
+            ))
+        sample_calls = re.findall(r'\b(\w+)\s*\.\s*sample\s*\(', content)
+        if sample_calls:
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.INFO,
+                code="COV-005",
+                message=f"Found sample() calls for: {', '.join(set(sample_calls))}",
+                confidence=1.0,
+            ))
+        return issues
+class TLMConnectionChecker:
+    """Check TLM connection completeness."""
+    def check_tlm_connections(
+        self,
+        content: str,
+        file_type: str,
+    ) -> List[ValidationIssue]:
+        """Check TLM connections in env/scoreboard."""
+        issues: List[ValidationIssue] = []
+        if file_type not in ("env", "scoreboard"):
+            return issues
+        analysis_ports = re.findall(
+            r'\buvm_analysis_port\s*#\s*<\s*(\w+)\s*>\s*(\w+)',
+            content
+        )
+        analysis_imps = re.findall(
+            r'\buvm_analysis_imp\s*#\s*<\s*(\w+)\s*,\s*(\w+)\s*>\s*(\w+)',
+            content
+        )
+        tlms = re.findall(
+            r'\buvm_tlm_(analysis_)?fifo\s*#\s*<\s*(\w+)\s*>\s*(\w+)',
+            content
+        )
+        connects = re.findall(
+            r'\b(\w+)\s*\.\s*connect\s*\(\s*(\w+)\s*\)',
+            content
+        )
+        port_names = [p[1] for p in analysis_ports]
+        imp_names = [i[2] for i in analysis_imps]
+        tlm_names = [t[2] for t in tlms]
+        all_tlms = port_names + imp_names + tlm_names
+        connected = set()
+        for from_port, to_port in connects:
+            connected.add(from_port)
+            connected.add(to_port)
+        unconnected = set(all_tlms) - connected
+        if all_tlms:
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.INFO,
+                code="TLM-001",
+                message=f"Found {len(all_tlms)} TLM port(s)/FIFO(s)",
+                confidence=1.0,
+            ))
+        if unconnected:
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.WARNING,
+                code="TLM-002",
+                message=f"TLM ports may not be connected: {', '.join(sorted(unconnected))}",
+                suggestion=f"Connect these ports in connect_phase using .connect()",
+                confidence=0.7,
+            ))
+        if connected and not unconnected:
+            issues.append(ValidationIssue(
+                severity=ValidationSeverity.INFO,
+                code="TLM-003",
+                message=f"All {len(connected)} TLM ports appear to be connected",
+                confidence=0.8,
+            ))
+        return issues
+class AdvancedCodeValidator:
+    """
+    Advanced code validator combining all checkers.
+    This is the main interface for:
+    1. Deep UVM compliance checking
+    2. Spec compliance validation
+    3. SystemVerilog syntax checking
+    4. Coverage completeness checking
+    5. TLM connection validation
+    """
+    FILE_TYPE_DETECTORS = [
+        (r'ral_model', "ral_model"),
+        (r'scoreboard', "scoreboard"),
+        (r'driver', "driver"),
+        (r'monitor', "monitor"),
+        (r'agent', "agent"),
+        (r'sequence_item', "sequence_item"),
+        (r'_sequence', "sequence"),
+        (r'regression', "sequence"),
+        (r'coverage_collector', "coverage"),
+        (r'protocol_checker', "checker"),
+        (r'_test', "test"),
+        (r'environment|env_', "env"),
+        (r'testbench', "testbench"),
+        (r'interface', "interface"),
+        (r'serial_monitor', "monitor"),
+    ]
+    NON_SV_EXTENSIONS = {'.f', '.tcl', '.core', '.json', '.yaml', '.yml', '.md', '.txt'}
+    def __init__(self, spec_dict: Optional[Dict[str, Any]] = None):
+        self.spec_dict = spec_dict
+        self._syntax_checker = SystemVerilogSyntaxChecker()
+        self._spec_checker = SpecComplianceChecker(spec_dict) if spec_dict else None
+        self._uvm_checker = UVMComplianceChecker()
+        self._coverage_checker = CoverageCompletenessChecker()
+        self._tlm_checker = TLMConnectionChecker()
+    @classmethod
+    def _is_sv_file(cls, filename: str) -> bool:
+        fname_lower = filename.lower()
+        for ext in cls.NON_SV_EXTENSIONS:
+            if fname_lower.endswith(ext):
+                return False
+        if fname_lower.endswith(('.sv', '.v', '.svh', '.vh')):
+            return True
+        if '/' in fname_lower or '\\' in fname_lower:
+            base = fname_lower.replace('\\', '/').split('/')[-1]
+            if '.' not in base:
+                return True
+        return True
+    @classmethod
+    def detect_file_type(cls, filename: str) -> str:
+        fname_lower = filename.lower()
+        for pattern, file_type in cls.FILE_TYPE_DETECTORS:
+            if re.search(pattern, fname_lower):
+                return file_type
+        return "unknown"
+    def _calculate_score(
+        self,
+        issues: List[ValidationIssue],
+        spec_metrics: Optional[Dict[str, Any]],
+        checks_run: int,
+    ) -> float:
+        """Calculate a quality score (0.0 to 1.0)."""
+        error_count = sum(1 for i in issues if i.severity == ValidationSeverity.ERROR)
+        warning_count = sum(1 for i in issues if i.severity == ValidationSeverity.WARNING)
+        info_count = sum(1 for i in issues if i.severity == ValidationSeverity.INFO)
+        base_score = 1.0
+        base_score -= error_count * 0.15
+        base_score -= warning_count * 0.05
+        if spec_metrics:
+            signal_cov = spec_metrics.get("signal_coverage", 0.0)
+            reg_cov = spec_metrics.get("register_coverage", 0.0)
+            base_score += signal_cov * 0.1
+            base_score += reg_cov * 0.1
+        return max(0.0, min(1.0, base_score))
+    def validate_file(
+        self,
+        filename: str,
+        content: str,
+        file_type: Optional[str] = None,
+    ) -> FileValidationResult:
+        """Validate a single file with all checkers."""
+        if not self._is_sv_file(filename):
+            return FileValidationResult(
+                filename=filename,
+                file_type="skipped",
+                passed=True,
+                issues=[],
+                checks_run=0,
+                checks_passed=0,
+                score=1.0,
+            )
+        if file_type is None:
+            file_type = self.detect_file_type(filename)
+        lines = content.split('\n')
+        issues: List[ValidationIssue] = []
+        checks_run = 0
+        checks_passed = 0
+        spec_metrics: Dict[str, Any] = {}
+        syntax_issues = self._syntax_checker.check(content, lines)
+        issues.extend(syntax_issues)
+        checks_run += 4
+        syntax_errors = sum(1 for i in syntax_issues if i.severity == ValidationSeverity.ERROR)
+        checks_passed += max(0, 4 - syntax_errors)
+        if self._spec_checker:
+            spec_issues, spec_metrics = self._spec_checker.check_spec_compliance(
+                content, file_type, lines
+            )
+            issues.extend(spec_issues)
+            checks_run += 3
+            spec_errors = sum(1 for i in spec_issues if i.severity == ValidationSeverity.ERROR)
+            checks_passed += max(0, 3 - spec_errors)
+        uvm_issues = self._uvm_checker.check_uvm_compliance(
+            content, file_type, lines
+        )
+        issues.extend(uvm_issues)
+        checks_run += 3
+        uvm_errors = sum(1 for i in uvm_issues if i.severity == ValidationSeverity.ERROR)
+        checks_passed += max(0, 3 - uvm_errors)
+        cov_issues = self._coverage_checker.check_coverage(
+            content, self.spec_dict or {}, file_type
+        )
+        issues.extend(cov_issues)
+        checks_run += 1
+        tlm_issues = self._tlm_checker.check_tlm_connections(content, file_type)
+        issues.extend(tlm_issues)
+        checks_run += 1
+        errors = sum(1 for i in issues if i.severity == ValidationSeverity.ERROR)
+        passed = errors == 0
+        score = self._calculate_score(issues, spec_metrics, checks_run)
+        return FileValidationResult(
+            filename=filename,
+            file_type=file_type,
+            passed=passed,
+            issues=issues,
+            checks_run=checks_run,
+            checks_passed=checks_passed,
+            score=score,
+        )
+    def validate_files(
+        self,
+        files: Dict[str, str],
+        design_name: str = "",
+    ) -> ValidationReport:
+        """Validate multiple files."""
+        file_results: List[FileValidationResult] = []
+        for filename, content in files.items():
+            result = self.validate_file(filename, content)
+            file_results.append(result)
+        total_errors = sum(f.error_count for f in file_results)
+        overall_passed = total_errors == 0
+        import datetime
+        report = ValidationReport(
+            design_name=design_name,
+            overall_passed=overall_passed,
+            files=file_results,
+            timestamp=datetime.datetime.now().isoformat(),
+        )
+        return report
+    def validate_files_by_path(
+        self,
+        file_paths: Dict[str, str],
+        design_name: str = "",
+    ) -> ValidationReport:
+        """Validate files by path."""
+        content_map: Dict[str, str] = {}
+        for filename, path in file_paths.items():
+            try:
+                with open(path, "r", encoding="utf-8") as f:
+                    content_map[filename] = f.read()
+            except Exception as e:
+                logger.warning("Failed to read %s: %s", path, e)
+                content_map[filename] = ""
+        return self.validate_files(content_map, design_name)

src/models/advanced_pattern_learner.py ADDED Viewed

	@@ -0,0 +1,926 @@

+"""
+Advanced Pattern Learner for UVM Testbench Generation.
+Key improvements for promotion:
+1. Context-aware error pattern extraction with n-grams
+2. Success pattern mining from successful generations
+3. Association rule learning between spec features and success
+4. Protocol-specific pattern libraries
+5. Error correlation detection
+6. Pattern-based code suggestions
+7. Temporal pattern tracking (learning over time)
+"""
+from __future__ import annotations
+import logging
+import re
+import math
+from collections import defaultdict, Counter
+from dataclasses import dataclass, field
+from typing import Dict, List, Any, Optional, Tuple, Set
+from enum import Enum
+logger = logging.getLogger("uvmgen.ml.patterns")
+class PatternType(Enum):
+    ERROR = "error"
+    SUCCESS = "success"
+    WARNING = "warning"
+    STRUCTURAL = "structural"
+@dataclass
+class Pattern:
+    pattern_str: str
+    pattern_type: PatternType
+    count: int = 0
+    confidence: float = 0.0
+    support: float = 0.0
+    lift: float = 1.0
+    contexts: List[str] = field(default_factory=list)
+    file_types: List[str] = field(default_factory=list)
+    protocols: List[str] = field(default_factory=list)
+    auto_fix: Optional[str] = None
+    description: str = ""
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "pattern_str": self.pattern_str,
+            "pattern_type": self.pattern_type.value,
+            "count": self.count,
+            "confidence": self.confidence,
+            "support": self.support,
+            "lift": self.lift,
+            "contexts": self.contexts,
+            "file_types": self.file_types,
+            "protocols": self.protocols,
+            "auto_fix": self.auto_fix,
+            "description": self.description,
+        }
+@dataclass
+class AssociationRule:
+    antecedent: str
+    consequent: str
+    confidence: float
+    support: float
+    lift: float
+    count: int = 0
+class NgramExtractor:
+    """Extract n-grams from code and error messages for pattern learning."""
+    def __init__(self, n_min: int = 1, n_max: int = 4):
+        self.n_min = n_min
+        self.n_max = n_max
+    def extract(self, text: str, file_type: str = "unknown") -> List[str]:
+        """Extract meaningful n-grams from text."""
+        clean_text = self._preprocess(text)
+        tokens = self._tokenize(clean_text)
+        if not tokens:
+            return []
+        ngrams = []
+        for n in range(self.n_min, self.n_max + 1):
+            for i in range(len(tokens) - n + 1):
+                ngram = " ".join(tokens[i:i + n])
+                if self._is_meaningful(ngram, file_type):
+                    ngrams.append(ngram)
+        return ngrams
+    def _preprocess(self, text: str) -> str:
+        """Preprocess text for tokenization."""
+        text = re.sub(r'//.*$', ' ', text, flags=re.MULTILINE)
+        text = re.sub(r'/\*.*?\*/', ' ', text, flags=re.DOTALL)
+        text = re.sub(r'"[^"]*"', 'STR', text)
+        text = text.replace('(', ' ( ').replace(')', ' ) ')
+        text = text.replace('[', ' [ ').replace(']', ' ] ')
+        text = text.replace('{', ' { ').replace('}', ' } ')
+        text = text.replace(';', ' ; ')
+        text = text.replace(',', ' , ')
+        return text
+    def _tokenize(self, text: str) -> List[str]:
+        """Tokenize into meaningful units."""
+        tokens = re.findall(r'[a-zA-Z_][a-zA-Z0-9_]*|[0-9]+|==|!=|<=|>=|\+=|-=|\*=|/=|&&|\|\||[+\-*/%=<>!&|~^?:;,\(\)\[\]\{\}]', text)
+        return [t.strip() for t in tokens if t.strip()]
+    def _is_meaningful(self, ngram: str, file_type: str) -> bool:
+        """Filter to keep only meaningful ngrams."""
+        if len(ngram) < 3:
+            return False
+        stop_patterns = {
+            'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to',
+            'for', 'of', 'with', 'by', 'is', 'was', 'are', 'were', 'be',
+            'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did',
+            'will', 'would', 'could', 'should', 'may', 'might', 'must',
+            'shall', 'can', 'need', 'dare', 'ought', 'used',
+            'if', 'else', 'then', 'for', 'while', 'until', 'unless',
+            'begin', 'end', 'module', 'endmodule', 'class', 'endclass',
+            'input', 'output', 'logic', 'reg', 'wire', 'bit', 'int',
+            'always', 'initial', 'assign', 'posedge', 'negedge',
+        }
+        words = ngram.lower().split()
+        if all(w in stop_patterns for w in words):
+            return False
+        uvm_keywords = {'uvm', 'test', 'env', 'agent', 'driver', 'monitor',
+                        'sequencer', 'sequence', 'scoreboard', 'register',
+                        'reg', 'phase', 'objection', 'config_db'}
+        if any(kw in ngram.lower() for kw in uvm_keywords):
+            return True
+        if file_type in ('sequence', 'test'):
+            seq_keywords = {'start_item', 'finish_item', 'raise_objection',
+                           'drop_objection', 'randomize', 'body'}
+            if any(kw in ngram for kw in seq_keywords):
+                return True
+        if len(words) >= 2:
+            return True
+        return len(ngram) > 5
+class ContextAwareErrorDetector:
+    """Detect errors with context for better pattern learning."""
+    ERROR_PATTERNS_WITH_CONTEXT = [
+        (
+            r'missing\s+.*semicolon',
+            'missing_semicolon',
+            'Ensure all statements end with semicolons',
+            'Check lines ending with expressions or declarations'
+        ),
+        (
+            r'unbalanced\s+.*parenthes',
+            'unbalanced_parentheses',
+            'Check for balanced parentheses',
+            'Count opening and closing parentheses in complex expressions'
+        ),
+        (
+            r'unbalanced\s+.*brace',
+            'unbalanced_braces',
+            'Check for balanced begin/end blocks',
+            'Verify all begin/fork have matching end/join'
+        ),
+        (
+            r'unbalanced\s+.*bracket',
+            'unbalanced_brackets',
+            'Check array indexing and part-selects',
+            'Verify all [ have matching ]'
+        ),
+        (
+            r'mismatch.*begin|begin.*without.*end',
+            'mismatched_blocks',
+            'Verify block structure',
+            'Check begin/end, fork/join pairing'
+        ),
+        (
+            r'uvm_fatal|uvm_error.*not.*found',
+            'missing_uvm_import',
+            'Import UVM package',
+            'Add `include "uvm_macros.svh" and import uvm_pkg::*'
+        ),
+        (
+            r'uvm_component_utils|uvm_object_utils.*missing',
+            'missing_factory_macro',
+            'Add UVM factory registration',
+            'Use `uvm_component_utils for components, `uvm_object_utils for objects'
+        ),
+        (
+            r'build_phase|connect_phase|run_phase.*not.*called',
+            'phase_implementation',
+            'Check phase method signatures',
+            'Ensure phases are declared as virtual functions/tasks with correct signatures'
+        ),
+        (
+            r'raise_objection|drop_objection.*missing',
+            'missing_objection',
+            'Add objection handling in tests/sequences',
+            'Use phase.raise_objection(this) and phase.drop_objection(this) in run_phase'
+        ),
+        (
+            r'config_db.*get.*failed|config_db.*set.*missing',
+            'config_db_issue',
+            'Check config_db usage',
+            'Ensure set/get paths match and config_db is set before build_phase'
+        ),
+        (
+            r'reg_model.*null|reg_model.*not.*initialized',
+            'missing_ral_model',
+            'Initialize RAL model in test',
+            'Create and build reg_model in test::build_phase, set in config_db'
+        ),
+        (
+            r'signal.*not.*declared|signal.*undefined',
+            'undefined_signal',
+            'Check signal declarations',
+            'Ensure all signals used are declared in the interface/module'
+        ),
+        (
+            r'port.*not.*connected|port.*missing',
+            'port_connection',
+            'Check port connections',
+            'Verify all module ports are connected in testbench'
+        ),
+        (
+            r'interface.*not.*set|vif.*null',
+            'missing_vif',
+            'Set virtual interface in config_db',
+            'Call uvm_config_db#(virtual intf)::set in testbench before run_test()'
+        ),
+        (
+            r'sequence.*not.*started|sequencer.*null',
+            'sequence_start',
+            'Check sequence starting',
+            'Ensure seq.start(sequencer) is called with valid sequencer'
+        ),
+        (
+            r'analysis_port.*not.*connected|analysis_export.*null',
+            'analysis_connection',
+            'Check TLM connections',
+            'Connect analysis ports to exports in connect_phase'
+        ),
+    ]
+    @classmethod
+    def extract_with_context(
+        cls,
+        error_msg: str,
+        content: Optional[str] = None,
+        line_num: Optional[int] = None,
+    ) -> List[Dict[str, Any]]:
+        """Extract error patterns with contextual information."""
+        results = []
+        for pattern, error_type, suggestion, context_tip in cls.ERROR_PATTERNS_WITH_CONTEXT:
+            if re.search(pattern, error_msg, re.IGNORECASE):
+                result = {
+                    'error_type': error_type,
+                    'pattern': pattern,
+                    'message': error_msg[:200] if len(error_msg) > 200 else error_msg,
+                    'suggestion': suggestion,
+                    'context_tip': context_tip,
+                    'line_number': line_num,
+                }
+                if content and line_num:
+                    result['context'] = cls._get_content_context(content, line_num)
+                results.append(result)
+        if not results:
+            results.append({
+                'error_type': 'unknown_error',
+                'message': error_msg[:200] if len(error_msg) > 200 else error_msg,
+                'suggestion': 'Review the error message details',
+                'line_number': line_num,
+            })
+        return results
+    @staticmethod
+    def _get_content_context(content: str, line_num: int, context_lines: int = 3) -> str:
+        """Get surrounding lines of content for context."""
+        lines = content.split('\n')
+        start = max(0, line_num - context_lines - 1)
+        end = min(len(lines), line_num + context_lines)
+        context_lines = []
+        for i in range(start, end):
+            marker = '>> ' if i == line_num - 1 else '   '
+            context_lines.append(f"{marker}{i+1:4d}: {lines[i]}")
+        return '\n'.join(context_lines)
+class SuccessPatternMiner:
+    """Mine patterns from successful generations for reuse."""
+    def __init__(self):
+        self._success_patterns: Dict[str, Pattern] = {}
+        self._file_type_patterns: Dict[str, Dict[str, int]] = defaultdict(dict)
+        self._protocol_patterns: Dict[str, Dict[str, int]] = defaultdict(dict)
+        self._total_successes: int = 0
+    def mine_from_success(
+        self,
+        content: str,
+        file_type: str,
+        protocol: str,
+        score: float,
+    ) -> List[str]:
+        """Mine successful patterns from high-quality generated code."""
+        if score < 0.7:
+            return []
+        extractor = NgramExtractor(n_min=2, n_max=5)
+        ngrams = extractor.extract(content, file_type)
+        filtered = self._filter_success_patterns(ngrams, file_type)
+        for ngram in filtered:
+            self._record_success_pattern(ngram, file_type, protocol, score)
+        self._total_successes += 1
+        return filtered
+    def _filter_success_patterns(self, ngrams: List[str], file_type: str) -> List[str]:
+        """Filter to keep only meaningful success patterns."""
+        filtered = []
+        success_indicators = {
+            'any': [
+                'uvm_component_utils', 'uvm_object_utils',
+                'raise_objection', 'drop_objection',
+                'build_phase', 'connect_phase', 'run_phase',
+                'config_db', 'type_id', 'create',
+            ],
+            'driver': [
+                'seq_item_port', 'get_next_item', 'item_done',
+            ],
+            'monitor': [
+                'analysis_port', 'write',
+            ],
+            'agent': [
+                'get_is_active', 'driver', 'monitor', 'sequencer',
+            ],
+            'scoreboard': [
+                'uvm_analysis_imp', 'write',
+            ],
+            'sequence': [
+                'start_item', 'finish_item', 'body', 'randomize',
+            ],
+            'test': [
+                'uvm_test', 'env', 'reg_model',
+            ],
+            'ral_model': [
+                'uvm_reg', 'uvm_reg_block', 'uvm_reg_field',
+                'create_map', 'lock_model',
+            ],
+        }
+        for ngram in ngrams:
+            indicators = success_indicators.get(file_type, []) + success_indicators.get('any', [])
+            if any(ind in ngram for ind in indicators):
+                filtered.append(ngram)
+        return list(set(filtered))
+    def _record_success_pattern(
+        self,
+        ngram: str,
+        file_type: str,
+        protocol: str,
+        score: float,
+    ) -> None:
+        """Record a successful pattern."""
+        if ngram not in self._success_patterns:
+            self._success_patterns[ngram] = Pattern(
+                pattern_str=ngram,
+                pattern_type=PatternType.SUCCESS,
+                description=f"Successful pattern from {file_type}",
+            )
+        pattern = self._success_patterns[ngram]
+        pattern.count += 1
+        if file_type not in pattern.file_types:
+            pattern.file_types.append(file_type)
+        if protocol not in pattern.protocols:
+            pattern.protocols.append(protocol)
+        if file_type not in self._file_type_patterns:
+            self._file_type_patterns[file_type] = defaultdict(int)
+        self._file_type_patterns[file_type][ngram] += 1
+        if protocol not in self._protocol_patterns:
+            self._protocol_patterns[protocol] = defaultdict(int)
+        self._protocol_patterns[protocol][ngram] += 1
+        total = float(self._total_successes + 1)
+        pattern.support = pattern.count / total
+        pattern.confidence = min(1.0, score * pattern.count / total)
+    def get_success_patterns(
+        self,
+        file_type: Optional[str] = None,
+        protocol: Optional[str] = None,
+        min_count: int = 2,
+        top_n: int = 20,
+    ) -> List[Pattern]:
+        """Get successful patterns filtered by criteria."""
+        candidates: List[Pattern] = []
+        for pattern in self._success_patterns.values():
+            if pattern.count < min_count:
+                continue
+            if file_type and file_type not in pattern.file_types:
+                continue
+            if protocol and protocol not in pattern.protocols:
+                continue
+            candidates.append(pattern)
+        candidates.sort(key=lambda p: (p.confidence, p.support), reverse=True)
+        return candidates[:top_n]
+    def get_recommendations(
+        self,
+        file_type: str,
+        protocol: str,
+    ) -> List[Dict[str, Any]]:
+        """Get code recommendations based on success patterns."""
+        recommendations = []
+        patterns = self.get_success_patterns(
+            file_type=file_type,
+            protocol=protocol,
+            min_count=1,
+            top_n=10,
+        )
+        for pattern in patterns:
+            recommendations.append({
+                'pattern': pattern.pattern_str,
+                'confidence': pattern.confidence,
+                'support': pattern.support,
+                'file_types': pattern.file_types,
+                'description': pattern.description,
+            })
+        return recommendations
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            'total_successes': self._total_successes,
+            'success_patterns': {k: v.to_dict() for k, v in self._success_patterns.items()},
+            'file_type_patterns': {
+                ft: dict(patterns) for ft, patterns in self._file_type_patterns.items()
+            },
+            'protocol_patterns': {
+                proto: dict(patterns) for proto, patterns in self._protocol_patterns.items()
+            },
+        }
+class AssociationRuleMiner:
+    """Mine association rules between spec features and generation success."""
+    def __init__(self, min_support: float = 0.1, min_confidence: float = 0.5):
+        self.min_support = min_support
+        self.min_confidence = min_confidence
+        self._transactions: List[Set[str]] = []
+        self._item_counts: Dict[str, int] = defaultdict(int)
+        self._rules: List[AssociationRule] = []
+    def add_transaction(self, items: List[str]) -> None:
+        """Add a transaction (set of features/outcomes)."""
+        item_set = set(items)
+        self._transactions.append(item_set)
+        for item in item_set:
+            self._item_counts[item] += 1
+    def mine_rules(self) -> List[AssociationRule]:
+        """Mine association rules from transactions."""
+        if len(self._transactions) < 5:
+            return []
+        min_support_count = int(self.min_support * len(self._transactions))
+        freq_items = {
+            item: count for item, count in self._item_counts.items()
+            if count >= min_support_count
+        }
+        if len(freq_items) < 2:
+            return []
+        rules = []
+        items_list = list(freq_items.keys())
+        for i, item1 in enumerate(items_list):
+            for item2 in items_list[i+1:]:
+                count_both = sum(
+                    1 for t in self._transactions
+                    if item1 in t and item2 in t
+                )
+                if count_both < min_support_count:
+                    continue
+                support = count_both / len(self._transactions)
+                confidence_1_2 = count_both / self._item_counts[item1]
+                confidence_2_1 = count_both / self._item_counts[item2]
+                support_item1 = self._item_counts[item1] / len(self._transactions)
+                support_item2 = self._item_counts[item2] / len(self._transactions)
+                lift_1_2 = confidence_1_2 / support_item2 if support_item2 > 0 else 1.0
+                lift_2_1 = confidence_2_1 / support_item1 if support_item1 > 0 else 1.0
+                if confidence_1_2 >= self.min_confidence:
+                    rules.append(AssociationRule(
+                        antecedent=item1,
+                        consequent=item2,
+                        confidence=confidence_1_2,
+                        support=support,
+                        lift=lift_1_2,
+                        count=count_both,
+                    ))
+                if confidence_2_1 >= self.min_confidence:
+                    rules.append(AssociationRule(
+                        antecedent=item2,
+                        consequent=item1,
+                        confidence=confidence_2_1,
+                        support=support,
+                        lift=lift_2_1,
+                        count=count_both,
+                    ))
+        rules.sort(key=lambda r: (r.confidence, r.lift, r.support), reverse=True)
+        self._rules = rules
+        return rules
+    def get_rules_for_antecedent(self, antecedent: str) -> List[AssociationRule]:
+        """Get all rules with a specific antecedent."""
+        return [r for r in self._rules if r.antecedent == antecedent]
+    def get_rules_for_consequent(self, consequent: str) -> List[AssociationRule]:
+        """Get all rules with a specific consequent."""
+        return [r for r in self._rules if r.consequent == consequent]
+class TemporalPatternTracker:
+    """Track how patterns evolve over time for continuous learning."""
+    def __init__(self, window_size: int = 100):
+        self.window_size = window_size
+        self._error_windows: Dict[str, List[bool]] = defaultdict(list)
+        self._success_windows: Dict[str, List[bool]] = defaultdict(list)
+        self._trends: Dict[str, float] = {}
+    def record_error(self, error_type: str, occurred: bool) -> None:
+        """Record whether an error occurred."""
+        self._error_windows[error_type].append(occurred)
+        if len(self._error_windows[error_type]) > self.window_size:
+            self._error_windows[error_type].pop(0)
+        self._update_trend(error_type, 'error')
+    def record_success(self, pattern: str, success: bool) -> None:
+        """Record pattern success."""
+        self._success_windows[pattern].append(success)
+        if len(self._success_windows[pattern]) > self.window_size:
+            self._success_windows[pattern].pop(0)
+        self._update_trend(pattern, 'success')
+    def _update_trend(self, key: str, pattern_type: str) -> None:
+        """Update trend direction."""
+        if pattern_type == 'error':
+            window = self._error_windows.get(key, [])
+        else:
+            window = self._success_windows.get(key, [])
+        if len(window) < 10:
+            self._trends[key] = 0.0
+            return
+        first_half = window[:len(window)//2]
+        second_half = window[len(window)//2:]
+        rate_first = sum(first_half) / len(first_half)
+        rate_second = sum(second_half) / len(second_half)
+        self._trends[key] = rate_second - rate_first
+    def get_trend(self, key: str) -> float:
+        """Get trend: positive = improving, negative = worsening."""
+        return self._trends.get(key, 0.0)
+    def get_error_rate(self, error_type: str) -> float:
+        """Get current error rate."""
+        window = self._error_windows.get(error_type, [])
+        if not window:
+            return 0.0
+        return sum(window) / len(window)
+    def get_success_rate(self, pattern: str) -> float:
+        """Get current success rate."""
+        window = self._success_windows.get(pattern, [])
+        if not window:
+            return 0.0
+        return sum(window) / len(window)
+    def get_improving_errors(self) -> List[Tuple[str, float]]:
+        """Get errors that are decreasing."""
+        improving = []
+        for key, trend in self._trends.items():
+            if key in self._error_windows and trend < -0.1:
+                improving.append((key, trend))
+        improving.sort(key=lambda x: x[1])
+        return improving
+    def get_worsening_errors(self) -> List[Tuple[str, float]]:
+        """Get errors that are increasing."""
+        worsening = []
+        for key, trend in self._trends.items():
+            if key in self._error_windows and trend > 0.1:
+                worsening.append((key, trend))
+        worsening.sort(key=lambda x: x[1], reverse=True)
+        return worsening
+class AdvancedPatternLearner:
+    """
+    Advanced pattern learner combining all capabilities.
+    This is the main interface for:
+    1. Error pattern detection and tracking
+    2. Success pattern mining
+    3. Association rule learning
+    4. Temporal trend analysis
+    5. Code recommendations
+    """
+    def __init__(self):
+        self._error_detector = ContextAwareErrorDetector()
+        self._success_miner = SuccessPatternMiner()
+        self._association_miner = AssociationRuleMiner(min_support=0.1, min_confidence=0.5)
+        self._temporal_tracker = TemporalPatternTracker(window_size=100)
+        self._error_patterns: Dict[str, Pattern] = {}
+        self._file_type_stats: Dict[str, Dict[str, Any]] = defaultdict(
+            lambda: {"success": 0, "total": 0, "errors": defaultdict(int)}
+        )
+        self._protocol_stats: Dict[str, Dict[str, Any]] = defaultdict(
+            lambda: {"success": 0, "total": 0}
+        )
+        self._ngram_extractor = NgramExtractor(n_min=1, n_max=4)
+    def record_error(
+        self,
+        error_msg: str,
+        file_type: str = "unknown",
+        content: Optional[str] = None,
+        line_num: Optional[int] = None,
+    ) -> List[Dict[str, Any]]:
+        """Record an error with full context analysis."""
+        errors = self._error_detector.extract_with_context(
+            error_msg, content, line_num
+        )
+        for err in errors:
+            error_type = err['error_type']
+            if error_type not in self._error_patterns:
+                self._error_patterns[error_type] = Pattern(
+                    pattern_str=error_type,
+                    pattern_type=PatternType.ERROR,
+                    description=err.get('suggestion', ''),
+                )
+            self._error_patterns[error_type].count += 1
+            self._error_patterns[error_type].contexts.append(
+                err.get('context', error_msg[:100])
+            )
+            if file_type not in self._error_patterns[error_type].file_types:
+                self._error_patterns[error_type].file_types.append(file_type)
+            self._file_type_stats[file_type]["errors"][error_type] += 1
+            self._temporal_tracker.record_error(error_type, True)
+        return errors
+    def record_success(
+        self,
+        file_type: str = "unknown",
+        protocol: str = "unknown",
+        content: Optional[str] = None,
+        score: float = 1.0,
+    ) -> List[str]:
+        """Record a success and mine patterns from it."""
+        self._file_type_stats[file_type]["success"] += 1
+        self._file_type_stats[file_type]["total"] += 1
+        self._protocol_stats[protocol]["success"] += 1
+        self._protocol_stats[protocol]["total"] += 1
+        mined_patterns = []
+        if content and score >= 0.7:
+            mined_patterns = self._success_miner.mine_from_success(
+                content, file_type, protocol, score
+            )
+            for pattern in mined_patterns:
+                self._temporal_tracker.record_success(pattern, True)
+            items = [
+                f"file_type:{file_type}",
+                f"protocol:{protocol}",
+                f"success:yes",
+                f"score:{int(score * 10)}",
+            ]
+            items.extend(mined_patterns[:5])
+            self._association_miner.add_transaction(items)
+        return mined_patterns
+    def record_attempt(
+        self,
+        file_type: str = "unknown",
+        protocol: str = "unknown",
+    ) -> None:
+        """Record an attempt (for stats tracking)."""
+        self._file_type_stats[file_type]["total"] += 1
+        self._protocol_stats[protocol]["total"] += 1
+    def get_common_errors(self, top_n: int = 10) -> List[Tuple[str, int, Pattern]]:
+        """Get the most common errors."""
+        sorted_errors = sorted(
+            self._error_patterns.items(),
+            key=lambda x: x[1].count,
+            reverse=True,
+        )
+        return [(name, p.count, p) for name, p in sorted_errors[:top_n]]
+    def get_file_type_success_rate(self, file_type: str) -> float:
+        """Get success rate for a file type."""
+        stats = self._file_type_stats.get(file_type, {})
+        total = stats.get("total", 0)
+        if total == 0:
+            return 0.5
+        return stats.get("success", 0) / total
+    def get_protocol_success_rate(self, protocol: str) -> float:
+        """Get success rate for a protocol."""
+        stats = self._protocol_stats.get(protocol, {})
+        total = stats.get("total", 0)
+        if total == 0:
+            return 0.5
+        return stats.get("success", 0) / total
+    def get_suggestions(
+        self,
+        file_type: str,
+        protocol: str,
+    ) -> Dict[str, Any]:
+        """Get comprehensive suggestions for improvement."""
+        common_errors = self.get_common_errors(5)
+        file_success_rate = self.get_file_type_success_rate(file_type)
+        protocol_success_rate = self.get_protocol_success_rate(protocol)
+        success_recommendations = self._success_miner.get_recommendations(
+            file_type, protocol
+        )
+        improving = self._temporal_tracker.get_improving_errors()
+        worsening = self._temporal_tracker.get_worsening_errors()
+        suggestions = {
+            "common_errors": [
+                {
+                    "error_type": name,
+                    "count": count,
+                    "description": pattern.description,
+                    "current_rate": self._temporal_tracker.get_error_rate(name),
+                    "trend": self._temporal_tracker.get_trend(name),
+                }
+                for name, count, pattern in common_errors
+            ],
+            "file_type_success_rate": file_success_rate,
+            "protocol_success_rate": protocol_success_rate,
+            "success_patterns": success_recommendations,
+            "improving_errors": [{"error": e[0], "trend": e[1]} for e in improving],
+            "worsening_errors": [{"error": e[0], "trend": e[1]} for e in worsening],
+            "recommendations": self._generate_advanced_recommendations(
+                file_type, protocol, file_success_rate, common_errors
+            ),
+        }
+        return suggestions
+    def _generate_advanced_recommendations(
+        self,
+        file_type: str,
+        protocol: str,
+        success_rate: float,
+        common_errors: List[Tuple],
+    ) -> List[str]:
+        """Generate advanced recommendations based on all data."""
+        recommendations = []
+        for name, count, pattern in common_errors[:3]:
+            if count > 0:
+                if pattern.description:
+                    recommendations.append(pattern.description)
+                elif 'semicolon' in name:
+                    recommendations.append("Ensure all statements end with semicolons")
+                elif 'parenthes' in name:
+                    recommendations.append("Check for balanced parentheses")
+                elif 'brace' in name or 'block' in name:
+                    recommendations.append("Check for balanced begin/end blocks")
+                elif 'uvm_macro' in name or 'factory' in name:
+                    recommendations.append(
+                        "Add UVM factory registration macros (uvm_component_utils/uvm_object_utils)"
+                    )
+                elif 'phase' in name:
+                    recommendations.append("Ensure proper UVM phase implementation")
+                elif 'objection' in name:
+                    recommendations.append(
+                        "Use phase.raise_objection(this) and phase.drop_objection(this)"
+                    )
+                elif 'config_db' in name or 'vif' in name:
+                    recommendations.append(
+                        "Ensure virtual interface is set in config_db before run_test()"
+                    )
+                elif 'ral' in name or 'reg_model' in name:
+                    recommendations.append(
+                        "Create and initialize RAL model in test::build_phase"
+                    )
+                elif 'signal' in name or 'port' in name:
+                    recommendations.append(
+                        "Ensure all signals/ports used are declared in spec and interface"
+                    )
+        if success_rate < 0.7:
+            recommendations.append(
+                f"Consider using retrieval-based generation for {file_type} (success rate: {success_rate:.1%})"
+            )
+        rules = self._association_miner.get_rules_for_antecedent(f"file_type:{file_type}")
+        for rule in rules[:3]:
+            if rule.confidence > 0.7 and rule.lift > 1.0:
+                recommendations.append(
+                    f"Consider: {rule.consequent} (confidence: {rule.confidence:.1%}, lift: {rule.lift:.2f})"
+                )
+        if not recommendations:
+            recommendations.append(
+                "No specific recommendations - generation should work well"
+            )
+        return recommendations
+    def mine_association_rules(self) -> List[AssociationRule]:
+        """Mine association rules from collected data."""
+        return self._association_miner.mine_rules()
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "error_patterns": {k: v.to_dict() for k, v in self._error_patterns.items()},
+            "file_type_stats": {
+                ft: {
+                    "success": s["success"],
+                    "total": s["total"],
+                    "errors": dict(s["errors"]),
+                }
+                for ft, s in self._file_type_stats.items()
+            },
+            "protocol_stats": dict(self._protocol_stats),
+            "success_miner": self._success_miner.to_dict(),
+        }
+    @classmethod
+    def from_dict(cls, d: Dict[str, Any]) -> "AdvancedPatternLearner":
+        learner = cls()
+        for name, pdict in d.get("error_patterns", {}).items():
+            pattern = Pattern(
+                pattern_str=pdict.get("pattern_str", name),
+                pattern_type=PatternType(pdict.get("pattern_type", "error")),
+                count=pdict.get("count", 0),
+                confidence=pdict.get("confidence", 0.0),
+                support=pdict.get("support", 0.0),
+                contexts=pdict.get("contexts", []),
+                file_types=pdict.get("file_types", []),
+                protocols=pdict.get("protocols", []),
+                description=pdict.get("description", ""),
+            )
+            learner._error_patterns[name] = pattern
+        for ft, s in d.get("file_type_stats", {}).items():
+            learner._file_type_stats[ft] = {
+                "success": s.get("success", 0),
+                "total": s.get("total", 0),
+                "errors": defaultdict(int, s.get("errors", {})),
+            }
+        for proto, s in d.get("protocol_stats", {}).items():
+            learner._protocol_stats[proto] = {
+                "success": s.get("success", 0),
+                "total": s.get("total", 0),
+            }
+        return learner

src/models/advanced_rl_learner.py ADDED Viewed

	@@ -0,0 +1,728 @@

+"""
+Advanced Reinforcement Learner for UVM Testbench Generation Strategy Selection.
+Key improvements for promotion:
+1. Experience replay buffer for more stable learning
+2. Eligibility traces for better credit assignment
+3. Upper Confidence Bound (UCB) for exploration-exploitation balance
+4. Multi-armed bandit strategies (epsilon-greedy, softmax, UCB)
+5. Contextual bandits considering spec features
+6. Learning rate scheduling
+7. Value function approximation with state aggregation
+8. Performance tracking and strategy comparison
+"""
+from __future__ import annotations
+import logging
+import math
+import random
+import json
+import os
+from collections import defaultdict, deque
+from dataclasses import dataclass, field
+from typing import Dict, List, Any, Optional, Tuple, Deque
+from enum import Enum
+from datetime import datetime
+logger = logging.getLogger("uvmgen.ml.rl")
+class ExplorationStrategy(Enum):
+    EPSILON_GREEDY = "epsilon_greedy"
+    SOFTMAX = "softmax"
+    UCB = "ucb"
+    THOMPSON_SAMPLING = "thompson_sampling"
+@dataclass
+class Experience:
+    """Single experience for replay buffer."""
+    state: str
+    action: str
+    reward: float
+    next_state: Optional[str]
+    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
+    metadata: Dict[str, Any] = field(default_factory=dict)
+@dataclass
+class ActionStats:
+    """Statistics for an action."""
+    q_value: float = 0.5
+    visit_count: int = 0
+    total_reward: float = 0.0
+    squared_reward: float = 0.0
+    success_count: int = 0
+    failure_count: int = 0
+    @property
+    def mean_reward(self) -> float:
+        if self.visit_count == 0:
+            return 0.5
+        return self.total_reward / self.visit_count
+    @property
+    def variance(self) -> float:
+        if self.visit_count < 2:
+            return 0.25
+        mean = self.mean_reward
+        return (self.squared_reward / self.visit_count) - (mean * mean)
+    @property
+    def std_dev(self) -> float:
+        return math.sqrt(max(0.0, self.variance))
+    @property
+    def success_rate(self) -> float:
+        total = self.success_count + self.failure_count
+        if total == 0:
+            return 0.5
+        return self.success_count / total
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "q_value": self.q_value,
+            "visit_count": self.visit_count,
+            "total_reward": self.total_reward,
+            "squared_reward": self.squared_reward,
+            "success_count": self.success_count,
+            "failure_count": self.failure_count,
+            "mean_reward": self.mean_reward,
+            "variance": self.variance,
+            "std_dev": self.std_dev,
+            "success_rate": self.success_rate,
+        }
+class ExperienceReplayBuffer:
+    """Buffer for storing and sampling experiences."""
+    def __init__(self, capacity: int = 10000):
+        self.capacity = capacity
+        self.buffer: Deque[Experience] = deque(maxlen=capacity)
+        self._episode_rewards: List[float] = []
+    def add(self, experience: Experience) -> None:
+        """Add an experience to the buffer."""
+        self.buffer.append(experience)
+    def sample(self, batch_size: int) -> List[Experience]:
+        """Sample a batch of experiences randomly."""
+        if len(self.buffer) < batch_size:
+            return list(self.buffer)
+        return random.sample(list(self.buffer), batch_size)
+    def sample_recent(self, batch_size: int, recency_weight: float = 0.8) -> List[Experience]:
+        """Sample with preference to recent experiences."""
+        if len(self.buffer) < batch_size:
+            return list(self.buffer)
+        recent_count = int(batch_size * recency_weight)
+        random_count = batch_size - recent_count
+        recent = list(self.buffer)[-recent_count:] if recent_count > 0 else []
+        random_part = random.sample(
+            list(self.buffer)[:-recent_count] if recent_count > 0 else list(self.buffer),
+            min(random_count, len(self.buffer) - recent_count)
+        ) if random_count > 0 else []
+        return recent + random_part
+    def get_all_by_state(self, state: str) -> List[Experience]:
+        """Get all experiences for a specific state."""
+        return [e for e in self.buffer if e.state == state]
+    def record_episode_reward(self, reward: float) -> None:
+        """Record episode-level reward for tracking."""
+        self._episode_rewards.append(reward)
+        if len(self._episode_rewards) > 1000:
+            self._episode_rewards = self._episode_rewards[-1000:]
+    def get_recent_performance(self, window: int = 100) -> Dict[str, float]:
+        """Get recent performance statistics."""
+        if not self._episode_rewards:
+            return {"mean": 0.5, "std": 0.0, "trend": 0.0}
+        recent = self._episode_rewards[-window:]
+        mean = sum(recent) / len(recent)
+        variance = sum((r - mean) ** 2 for r in recent) / len(recent)
+        std = math.sqrt(max(0.0, variance))
+        if len(recent) >= 20:
+            first_half = recent[:len(recent)//2]
+            second_half = recent[len(recent)//2:]
+            trend = (sum(second_half) / len(second_half)) - (sum(first_half) / len(first_half))
+        else:
+            trend = 0.0
+        return {
+            "mean": mean,
+            "std": std,
+            "trend": trend,
+            "count": len(recent),
+        }
+    def __len__(self) -> int:
+        return len(self.buffer)
+class EligibilityTraces:
+    """Eligibility traces for better credit assignment."""
+    def __init__(self, lambda_: float = 0.9, discount: float = 0.95):
+        self.lambda_ = lambda_
+        self.discount = discount
+        self._traces: Dict[str, Dict[str, float]] = defaultdict(lambda: defaultdict(float))
+    def update(self, state: str, action: str) -> None:
+        """Update trace for visited state-action pair."""
+        for s in list(self._traces.keys()):
+            for a in list(self._traces[s].keys()):
+                self._traces[s][a] *= self.lambda_ * self.discount
+        self._traces[state][action] = 1.0
+    def get_trace(self, state: str, action: str) -> float:
+        """Get the eligibility trace value."""
+        return self._traces.get(state, {}).get(action, 0.0)
+    def decay_all(self) -> None:
+        """Decay all traces."""
+        for s in self._traces:
+            for a in self._traces[s]:
+                self._traces[s][a] *= self.lambda_ * self.discount
+    def reset(self) -> None:
+        """Reset all traces."""
+        self._traces.clear()
+class ContextualBanditFeatures:
+    """Feature extraction for contextual bandits."""
+    @staticmethod
+    def extract_features(
+        spec_dict: Dict[str, Any],
+        file_type: str,
+    ) -> Dict[str, Any]:
+        """Extract features from spec and context."""
+        features = {}
+        protocol = spec_dict.get("protocol", "unknown")
+        features["protocol"] = protocol
+        interfaces = spec_dict.get("interfaces", [])
+        features["num_interfaces"] = len(interfaces)
+        total_signals = sum(len(iface.get("signals", [])) for iface in interfaces)
+        features["total_signals"] = total_signals
+        registers = spec_dict.get("registers", [])
+        features["num_registers"] = len(registers)
+        total_fields = sum(len(reg.get("fields", [])) for reg in registers)
+        features["total_fields"] = total_fields
+        complexity = 0.0
+        if total_signals > 0:
+            complexity += math.log10(total_signals + 1) * 0.3
+        if total_fields > 0:
+            complexity += math.log10(total_fields + 1) * 0.4
+        complexity += len(interfaces) * 0.15
+        complexity += len(registers) * 0.15
+        features["complexity"] = min(1.0, complexity)
+        file_type_weights = {
+            "testbench": 0.3,
+            "interface": 0.25,
+            "test": 0.2,
+            "sequence": 0.15,
+            "driver": 0.1,
+            "monitor": 0.1,
+            "agent": 0.1,
+            "scoreboard": 0.15,
+            "ral_model": 0.2,
+            "env": 0.15,
+        }
+        features["file_type_weight"] = file_type_weights.get(file_type, 0.1)
+        return features
+    @staticmethod
+    def get_state_key(
+        protocol: str,
+        file_type: str,
+        complexity_bucket: str,
+    ) -> str:
+        """Generate a state key for RL."""
+        return f"{protocol}:{file_type}:{complexity_bucket}"
+    @staticmethod
+    def bucket_complexity(complexity: float) -> str:
+        """Bucket complexity into discrete levels."""
+        if complexity < 0.3:
+            return "low"
+        elif complexity < 0.6:
+            return "medium"
+        else:
+            return "high"
+class AdvancedReinforcementLearner:
+    """
+    Advanced RL learner with multiple strategies and improvements.
+    Key features:
+    - Experience replay buffer
+    - Eligibility traces
+    - Multiple exploration strategies
+    - Contextual bandit support
+    - Learning rate scheduling
+    - Performance tracking
+    """
+    def __init__(
+        self,
+        learning_rate: float = 0.1,
+        discount_factor: float = 0.95,
+        exploration_strategy: ExplorationStrategy = ExplorationStrategy.UCB,
+        epsilon: float = 0.1,
+        epsilon_decay: float = 0.995,
+        min_epsilon: float = 0.01,
+        ucb_c: float = 2.0,
+        temperature: float = 1.0,
+        use_eligibility_traces: bool = True,
+        lambda_: float = 0.9,
+        replay_buffer_capacity: int = 10000,
+    ):
+        self._learning_rate = learning_rate
+        self._initial_learning_rate = learning_rate
+        self._discount_factor = discount_factor
+        self._exploration_strategy = exploration_strategy
+        self._epsilon = epsilon
+        self._epsilon_decay = epsilon_decay
+        self._min_epsilon = min_epsilon
+        self._ucb_c = ucb_c
+        self._temperature = temperature
+        self._q_values: Dict[str, Dict[str, float]] = defaultdict(lambda: defaultdict(lambda: 0.5))
+        self._action_stats: Dict[str, Dict[str, ActionStats]] = defaultdict(dict)
+        self._total_updates: int = 0
+        self._use_eligibility_traces = use_eligibility_traces
+        if use_eligibility_traces:
+            self._eligibility_traces = EligibilityTraces(lambda_=lambda_, discount=discount_factor)
+        self._replay_buffer = ExperienceReplayBuffer(capacity=replay_buffer_capacity)
+        self._episode_count: int = 0
+        self._best_actions: Dict[str, str] = {}
+    def _get_state_key(
+        self,
+        protocol: str,
+        file_type: str,
+        spec_dict: Optional[Dict[str, Any]] = None,
+    ) -> str:
+        """Generate state key with optional context."""
+        if spec_dict:
+            features = ContextualBanditFeatures.extract_features(spec_dict, file_type)
+            complexity_bucket = ContextualBanditFeatures.bucket_complexity(features["complexity"])
+            return ContextualBanditFeatures.get_state_key(protocol, file_type, complexity_bucket)
+        return f"{protocol}:{file_type}"
+    def _ensure_stats(self, state: str, action: str) -> None:
+        """Ensure action stats exist for state-action pair."""
+        if state not in self._action_stats:
+            self._action_stats[state] = {}
+        if action not in self._action_stats[state]:
+            self._action_stats[state][action] = ActionStats()
+    def get_action_value(
+        self,
+        protocol: str,
+        file_type: str,
+        generation_source: str,
+        spec_dict: Optional[Dict[str, Any]] = None,
+    ) -> float:
+        """Get the Q-value for a state-action pair."""
+        state = self._get_state_key(protocol, file_type, spec_dict)
+        return self._q_values[state][generation_source]
+    def get_action_stats(
+        self,
+        protocol: str,
+        file_type: str,
+        generation_source: str,
+        spec_dict: Optional[Dict[str, Any]] = None,
+    ) -> Optional[ActionStats]:
+        """Get statistics for an action."""
+        state = self._get_state_key(protocol, file_type, spec_dict)
+        return self._action_stats.get(state, {}).get(generation_source)
+    def update(
+        self,
+        protocol: str,
+        file_type: str,
+        generation_source: str,
+        reward: float,
+        next_state: Optional[str] = None,
+        spec_dict: Optional[Dict[str, Any]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """Update Q-values with reward, using eligibility traces if enabled."""
+        state = self._get_state_key(protocol, file_type, spec_dict)
+        self._ensure_stats(state, generation_source)
+        stats = self._action_stats[state][generation_source]
+        old_value = self._q_values[state][generation_source]
+        if next_state and self._q_values.get(next_state):
+            next_max = max(self._q_values[next_state].values()) if self._q_values[next_state] else 0.5
+            target = reward + self._discount_factor * next_max
+        else:
+            target = reward
+        td_error = target - old_value
+        if self._use_eligibility_traces and self._eligibility_traces:
+            self._eligibility_traces.update(state, generation_source)
+            for s in list(self._q_values.keys()):
+                for a in list(self._q_values[s].keys()):
+                    trace = self._eligibility_traces.get_trace(s, a)
+                    if trace > 0:
+                        self._q_values[s][a] += self._learning_rate * td_error * trace
+        else:
+            self._q_values[state][generation_source] = old_value + self._learning_rate * td_error
+        stats.visit_count += 1
+        stats.total_reward += reward
+        stats.squared_reward += reward * reward
+        stats.q_value = self._q_values[state][generation_source]
+        if reward >= 0.5:
+            stats.success_count += 1
+        else:
+            stats.failure_count += 1
+        self._total_updates += 1
+        experience = Experience(
+            state=state,
+            action=generation_source,
+            reward=reward,
+            next_state=next_state,
+            metadata=metadata or {},
+        )
+        self._replay_buffer.add(experience)
+        self._replay_buffer.record_episode_reward(reward)
+        actions = self._q_values[state]
+        if actions:
+            self._best_actions[state] = max(actions.keys(), key=lambda a: actions[a])
+    def _select_epsilon_greedy(
+        self,
+        state: str,
+        available_sources: List[str],
+    ) -> Tuple[str, float]:
+        """Select action using epsilon-greedy strategy."""
+        if random.random() < self._epsilon and len(available_sources) > 1:
+            chosen = random.choice(available_sources)
+            return chosen, self._q_values[state][chosen]
+        best_source = available_sources[0]
+        best_value = -1.0
+        for source in available_sources:
+            value = self._q_values[state][source]
+            if value > best_value:
+                best_value = value
+                best_source = source
+        return best_source, best_value
+    def _select_softmax(
+        self,
+        state: str,
+        available_sources: List[str],
+    ) -> Tuple[str, float]:
+        """Select action using softmax (Boltzmann) exploration."""
+        values = [self._q_values[state][s] for s in available_sources]
+        max_val = max(values) if values else 0.0
+        exp_values = [math.exp((v - max_val) / self._temperature) for v in values]
+        sum_exp = sum(exp_values)
+        if sum_exp == 0:
+            probs = [1.0 / len(available_sources)] * len(available_sources)
+        else:
+            probs = [e / sum_exp for e in exp_values]
+        r = random.random()
+        cumulative = 0.0
+        for i, prob in enumerate(probs):
+            cumulative += prob
+            if r <= cumulative:
+                return available_sources[i], values[i]
+        return available_sources[0], values[0]
+    def _select_ucb(
+        self,
+        state: str,
+        available_sources: List[str],
+    ) -> Tuple[str, float]:
+        """Select action using Upper Confidence Bound (UCB1)."""
+        total_visits = sum(
+            self._action_stats.get(state, {}).get(s, ActionStats()).visit_count
+            for s in available_sources
+        )
+        if total_visits == 0:
+            return random.choice(available_sources), 0.5
+        best_source = available_sources[0]
+        best_ucb = -1.0
+        for source in available_sources:
+            stats = self._action_stats.get(state, {}).get(source, ActionStats())
+            q_value = self._q_values[state][source]
+            if stats.visit_count == 0:
+                ucb = float('inf')
+            else:
+                exploration = self._ucb_c * math.sqrt(
+                    math.log(total_visits) / stats.visit_count
+                )
+                ucb = q_value + exploration
+            if ucb > best_ucb:
+                best_ucb = ucb
+                best_source = source
+        return best_source, self._q_values[state][best_source]
+    def _select_thompson(
+        self,
+        state: str,
+        available_sources: List[str],
+    ) -> Tuple[str, float]:
+        """Select action using Thompson sampling (Beta distribution)."""
+        samples = []
+        for source in available_sources:
+            stats = self._action_stats.get(state, {}).get(source, ActionStats())
+            alpha = 1 + stats.success_count
+            beta_val = 1 + stats.failure_count
+            try:
+                import random as rnd
+                sample = rnd.betavariate(alpha, beta_val)
+            except (ImportError, AttributeError):
+                sample = stats.success_rate + random.gauss(0, 0.1)
+                sample = max(0.0, min(1.0, sample))
+            samples.append((source, sample, self._q_values[state][source]))
+        samples.sort(key=lambda x: x[1], reverse=True)
+        return samples[0][0], samples[0][2]
+    def select_best_action(
+        self,
+        protocol: str,
+        file_type: str,
+        available_sources: List[str],
+        spec_dict: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[str, float]:
+        """
+        Select the best action using configured exploration strategy.
+        Returns:
+            Tuple of (chosen_source, expected_value)
+        """
+        state = self._get_state_key(protocol, file_type, spec_dict)
+        if len(available_sources) == 0:
+            return "template", 0.5
+        if len(available_sources) == 1:
+            return available_sources[0], self._q_values[state][available_sources[0]]
+        for source in available_sources:
+            if source not in self._q_values[state]:
+                self._q_values[state][source] = 0.5
+        if self._exploration_strategy == ExplorationStrategy.EPSILON_GREEDY:
+            result = self._select_epsilon_greedy(state, available_sources)
+        elif self._exploration_strategy == ExplorationStrategy.SOFTMAX:
+            result = self._select_softmax(state, available_sources)
+        elif self._exploration_strategy == ExplorationStrategy.UCB:
+            result = self._select_ucb(state, available_sources)
+        elif self._exploration_strategy == ExplorationStrategy.THOMPSON_SAMPLING:
+            result = self._select_thompson(state, available_sources)
+        else:
+            result = self._select_ucb(state, available_sources)
+        if self._exploration_strategy == ExplorationStrategy.EPSILON_GREEDY:
+            self._epsilon = max(self._min_epsilon, self._epsilon * self._epsilon_decay)
+        self._episode_count += 1
+        decay = max(0.001, 1.0 / math.sqrt(self._total_updates + 1))
+        self._learning_rate = self._initial_learning_rate * decay
+        return result
+    def get_performance_stats(self) -> Dict[str, Any]:
+        """Get comprehensive performance statistics."""
+        buffer_stats = self._replay_buffer.get_recent_performance()
+        all_states = list(self._q_values.keys())
+        total_actions = sum(len(v) for v in self._q_values.values())
+        state_stats = {}
+        for state in all_states:
+            actions = self._q_values[state]
+            if not actions:
+                continue
+            best_action = max(actions.keys(), key=lambda a: actions[a])
+            best_value = actions[best_action]
+            state_stats[state] = {
+                "best_action": best_action,
+                "best_q_value": best_value,
+                "num_actions": len(actions),
+                "actions": {
+                    a: {
+                        "q_value": self._q_values[state][a],
+                        "stats": self._action_stats.get(state, {}).get(a, ActionStats()).to_dict()
+                    }
+                    for a in actions
+                },
+            }
+        return {
+            "episode_count": self._episode_count,
+            "total_updates": self._total_updates,
+            "learning_rate": self._learning_rate,
+            "epsilon": self._epsilon,
+            "exploration_strategy": self._exploration_strategy.value,
+            "replay_buffer_size": len(self._replay_buffer),
+            "buffer_performance": buffer_stats,
+            "num_states": len(all_states),
+            "total_actions_tracked": total_actions,
+            "state_stats": state_stats,
+            "best_actions": self._best_actions.copy(),
+        }
+    def replay_experiences(self, batch_size: int = 32, use_recency: bool = True) -> int:
+        """
+        Replay experiences from buffer for additional learning.
+        Returns:
+            Number of experiences replayed
+        """
+        if use_recency:
+            batch = self._replay_buffer.sample_recent(batch_size)
+        else:
+            batch = self._replay_buffer.sample(batch_size)
+        if not batch:
+            return 0
+        for exp in batch:
+            state = exp.state
+            action = exp.action
+            reward = exp.reward
+            old_value = self._q_values[state][action]
+            self._q_values[state][action] = (
+                old_value + self._learning_rate * (reward - old_value)
+            )
+            self._ensure_stats(state, action)
+            stats = self._action_stats[state][action]
+            stats.total_reward += reward * 0.1
+            stats.squared_reward += (reward * reward) * 0.1
+        return len(batch)
+    def reset_episode(self) -> None:
+        """Reset for a new episode (clears eligibility traces)."""
+        if self._use_eligibility_traces and self._eligibility_traces:
+            self._eligibility_traces.reset()
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "learning_rate": self._learning_rate,
+            "initial_learning_rate": self._initial_learning_rate,
+            "discount_factor": self._discount_factor,
+            "exploration_strategy": self._exploration_strategy.value,
+            "epsilon": self._epsilon,
+            "epsilon_decay": self._epsilon_decay,
+            "min_epsilon": self._min_epsilon,
+            "ucb_c": self._ucb_c,
+            "temperature": self._temperature,
+            "use_eligibility_traces": self._use_eligibility_traces,
+            "episode_count": self._episode_count,
+            "total_updates": self._total_updates,
+            "q_values": {k: dict(v) for k, v in self._q_values.items()},
+            "action_stats": {
+                state: {action: stats.to_dict() for action, stats in actions.items()}
+                for state, actions in self._action_stats.items()
+            },
+            "best_actions": self._best_actions.copy(),
+        }
+    @classmethod
+    def from_dict(cls, d: Dict[str, Any]) -> "AdvancedReinforcementLearner":
+        strategy_map = {e.value: e for e in ExplorationStrategy}
+        strategy = strategy_map.get(
+            d.get("exploration_strategy", "ucb"),
+            ExplorationStrategy.UCB
+        )
+        learner = cls(
+            learning_rate=d.get("initial_learning_rate", 0.1),
+            discount_factor=d.get("discount_factor", 0.95),
+            exploration_strategy=strategy,
+            epsilon=d.get("epsilon", 0.1),
+            epsilon_decay=d.get("epsilon_decay", 0.995),
+            min_epsilon=d.get("min_epsilon", 0.01),
+            ucb_c=d.get("ucb_c", 2.0),
+            temperature=d.get("temperature", 1.0),
+            use_eligibility_traces=d.get("use_eligibility_traces", True),
+        )
+        learner._learning_rate = d.get("learning_rate", 0.1)
+        learner._episode_count = d.get("episode_count", 0)
+        learner._total_updates = d.get("total_updates", 0)
+        for state, actions in d.get("q_values", {}).items():
+            for action, value in actions.items():
+                learner._q_values[state][action] = value
+        for state, actions in d.get("action_stats", {}).items():
+            if state not in learner._action_stats:
+                learner._action_stats[state] = {}
+            for action, stats_dict in actions.items():
+                stats = ActionStats()
+                stats.q_value = stats_dict.get("q_value", 0.5)
+                stats.visit_count = stats_dict.get("visit_count", 0)
+                stats.total_reward = stats_dict.get("total_reward", 0.0)
+                stats.squared_reward = stats_dict.get("squared_reward", 0.0)
+                stats.success_count = stats_dict.get("success_count", 0)
+                stats.failure_count = stats_dict.get("failure_count", 0)
+                learner._action_stats[state][action] = stats
+        learner._best_actions = d.get("best_actions", {}).copy()
+        return learner

src/models/enhanced_ml_model_v2.py ADDED Viewed

	@@ -0,0 +1,801 @@

+"""
+Enhanced ML Generation Model with Advanced Components.
+Key improvements for promotion:
+1. Advanced pattern learner with context-aware error detection
+2. Advanced RL learner with experience replay and eligibility traces
+3. Advanced code validator with deep UVM compliance
+4. Ensemble retrieval with weighted voting
+5. Adaptive strategy selection
+6. Confidence calibration
+7. Performance tracking and reporting
+"""
+from __future__ import annotations
+import logging
+import json
+import os
+import math
+from collections import defaultdict, Counter
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import Any, Dict, List, Optional, Tuple, Set
+from src.models.base_model import GenerationModel
+from src.models.template_model import TemplateModel
+from src.config import PipelineConfig, DesignSpec
+try:
+    from src.features.extractors import RichSpecFeatureExtractor
+    from src.models.similarity_index import SimilarityIndex, SearchResult
+    from src.models.ml_utils import (
+        RichFeatureVector,
+        combined_similarity,
+        HybridVectorizer,
+    )
+    from src.models.spec_adapter import SpecAdapter, AdaptationPlan
+    from src.models.code_validator import (
+        CodeValidator,
+        ValidationReport,
+        FileValidationResult,
+    )
+    from src.models.advanced_pattern_learner import (
+        AdvancedPatternLearner,
+        PatternType,
+        Pattern,
+    )
+    from src.models.advanced_rl_learner import (
+        AdvancedReinforcementLearner,
+        ExplorationStrategy,
+        Experience,
+    )
+    from src.models.advanced_code_validator import (
+        AdvancedCodeValidator,
+        ValidationReport as AdvancedValidationReport,
+    )
+    HAS_ADVANCED = True
+except ImportError as e:
+    logger = logging.getLogger("uvmgen.ml")
+    logger.warning(f"Some advanced components not available: {e}")
+    HAS_ADVANCED = False
+logger = logging.getLogger("uvmgen.ml.enhanced")
+class GenerationSource(Enum):
+    RETRIEVAL = "retrieval"
+    LLM = "llm"
+    TEMPLATE = "template"
+    HYBRID = "hybrid"
+@dataclass
+class RetrievalInfo:
+    used_similarity: bool = True
+    similar_specs: int = 0
+    best_score: float = 0.0
+    best_spec_name: str = ""
+    adaptation_score: float = 0.0
+    pre_validation_score: float = 0.0
+    retrieval_strategy: str = "default"
+@dataclass
+class GenerationResult:
+    files: Dict[str, str] = field(default_factory=dict)
+    source: GenerationSource = GenerationSource.TEMPLATE
+    retrieval_info: Optional[RetrievalInfo] = None
+    validation_report: Optional[AdvancedValidationReport] = None
+    score: float = 0.0
+    errors: List[str] = field(default_factory=list)
+    warnings: List[str] = field(default_factory=list)
+@dataclass
+class StrategyWeights:
+    retrieval_weight: float = 0.4
+    llm_weight: float = 0.3
+    template_weight: float = 0.3
+    def normalize(self) -> "StrategyWeights":
+        total = self.retrieval_weight + self.llm_weight + self.template_weight
+        if total <= 0:
+            return StrategyWeights(0.34, 0.33, 0.33)
+        return StrategyWeights(
+            retrieval_weight=self.retrieval_weight / total,
+            llm_weight=self.llm_weight / total,
+            template_weight=self.template_weight / total,
+        )
+class EnhancedMLGenerationModelV2(GenerationModel):
+    """
+    Enhanced ML Generation Model V2 with advanced components.
+    Key features for promotion:
+    1. Ensemble retrieval with multi-strategy voting
+    2. Advanced RL with experience replay and eligibility traces
+    3. Context-aware pattern learning
+    4. Deep UVM compliance validation
+    5. Adaptive weight adjustment based on performance
+    6. Confidence calibration
+    7. Comprehensive performance tracking
+    """
+    def __init__(
+        self,
+        name: str = "enhanced_ml_model_v2",
+        config: Optional[Any] = None,
+        templates_dir: str = "src/generation/templates",
+        strict_validation: bool = True,
+        use_llm: bool = False,
+        use_semantic_encoder: bool = False,
+        use_learning: bool = True,
+        llm_model_name: Optional[str] = None,
+        learning_storage_path: Optional[str] = None,
+        exploration_strategy: str = "ucb",
+    ):
+        super().__init__(name)
+        self._templates_dir = templates_dir
+        self._strict_validation = strict_validation
+        self._use_llm = use_llm
+        self._use_semantic_encoder = use_semantic_encoder
+        self._use_learning = use_learning
+        self._llm_model_name = llm_model_name
+        self._learning_storage_path = learning_storage_path
+        self._template_model = TemplateModel(templates_dir=templates_dir)
+        self._index: Optional[SimilarityIndex] = None
+        self._extractor: Optional[RichSpecFeatureExtractor] = None
+        self._adapter: Optional[SpecAdapter] = None
+        self._vectorizer: Optional[HybridVectorizer] = None
+        self._pattern_learner: Optional[AdvancedPatternLearner] = None
+        self._rl_learner: Optional[AdvancedReinforcementLearner] = None
+        self._code_validator: Optional[AdvancedCodeValidator] = None
+        self.last_retrieval: Optional[RetrievalInfo] = None
+        self._generation_history: List[Dict[str, Any]] = []
+        strategy_map = {
+            "epsilon_greedy": ExplorationStrategy.EPSILON_GREEDY,
+            "softmax": ExplorationStrategy.SOFTMAX,
+            "ucb": ExplorationStrategy.UCB,
+            "thompson": ExplorationStrategy.THOMPSON_SAMPLING,
+        }
+        self._exploration_strategy = strategy_map.get(
+            exploration_strategy.lower(),
+            ExplorationStrategy.UCB
+        )
+        self._strategy_weights = StrategyWeights()
+        self._initialize_components()
+    def _initialize_components(self) -> None:
+        """Initialize all ML components."""
+        if HAS_ADVANCED:
+            self._extractor = RichSpecFeatureExtractor()
+            self._index = SimilarityIndex()
+            self._adapter = SpecAdapter()
+            self._vectorizer = HybridVectorizer()
+            if self._use_learning:
+                self._pattern_learner = AdvancedPatternLearner()
+                self._rl_learner = AdvancedReinforcementLearner(
+                    exploration_strategy=self._exploration_strategy,
+                    use_eligibility_traces=True,
+                    replay_buffer_capacity=10000,
+                )
+            if self._learning_storage_path and os.path.exists(self._learning_storage_path):
+                self._load_learning_state()
+            logger.info(f"Enhanced ML Generation Model V2 initialized with strategy: {self._exploration_strategy.value}")
+        else:
+            logger.warning("Advanced components not available, using basic template model only")
+    def train(
+        self,
+        specs: List[DesignSpec],
+        pre_generated: Optional[Dict[str, Dict[str, str]]] = None,
+    ) -> Dict[str, Any]:
+        """Train the model on design specifications."""
+        if not HAS_ADVANCED or not self._extractor or not self._index:
+            return self._template_model.train(specs)
+        for spec in specs:
+            features = self._extractor.extract(spec)
+            spec_dict = spec.model_dump() if hasattr(spec, 'model_dump') else dict(spec)
+            if pre_generated and spec.design_name in pre_generated:
+                generated = pre_generated[spec.design_name]
+            else:
+                generated = {}
+            self._index.add(features, spec_dict, generated)
+            logger.info(f"Added spec '{spec.design_name}' ({features.fingerprint()}) to index")
+        all_features = []
+        for entry in self._index:
+            if hasattr(entry, 'feature_vector'):
+                text_repr = entry.feature_vector.to_text_repr()
+                all_features.append(text_repr)
+        if all_features and self._vectorizer:
+            self._vectorizer.fit(all_features)
+        return {
+            "index_size": len(self._index),
+            "model_name": self.name,
+            "features_extracted": len(all_features),
+        }
+    def predict(
+        self,
+        spec: DesignSpec,
+        cfg: PipelineConfig,
+        extra_seqs: Optional[List[str]] = None,
+    ) -> Dict[str, str]:
+        """Generate testbench for a specification."""
+        if not HAS_ADVANCED:
+            return self._template_model.predict(spec, cfg)
+        spec_dict = spec.model_dump() if hasattr(spec, 'model_dump') else dict(spec)
+        design_name = spec.design_name
+        protocol = spec_dict.get("protocol", "unknown")
+        self._code_validator = AdvancedCodeValidator(spec_dict)
+        available_sources = self._get_available_sources()
+        selected_source = self._select_generation_strategy(
+            spec_dict=spec_dict,
+            protocol=protocol,
+            available_sources=available_sources,
+        )
+        logger.info(f"Selected generation strategy: {selected_source.value}")
+        result = self._generate_with_strategy(
+            strategy=selected_source,
+            spec=spec,
+            spec_dict=spec_dict,
+            config=cfg,
+            design_name=design_name,
+            protocol=protocol,
+        )
+        final_result = self._apply_validation_and_fallback(
+            result=result,
+            spec=spec,
+            config=cfg,
+            spec_dict=spec_dict,
+            design_name=design_name,
+            protocol=protocol,
+        )
+        self._record_learning(
+            final_result=final_result,
+            spec_dict=spec_dict,
+            design_name=design_name,
+            protocol=protocol,
+            selected_source=selected_source,
+        )
+        return final_result.files
+    def _get_available_sources(self) -> List[str]:
+        """Get list of available generation sources."""
+        sources = ["template"]
+        if self._index and len(self._index) > 0:
+            sources.append("retrieval")
+        if self._use_llm:
+            sources.append("llm")
+        return sources
+    def _select_generation_strategy(
+        self,
+        spec_dict: Dict[str, Any],
+        protocol: str,
+        available_sources: List[str],
+    ) -> GenerationSource:
+        """Select generation strategy using advanced RL."""
+        if len(available_sources) == 1:
+            return GenerationSource(available_sources[0])
+        if not self._use_learning or not self._rl_learner:
+            if "retrieval" in available_sources and self._index and len(self._index) > 0:
+                return GenerationSource.RETRIEVAL
+            return GenerationSource.TEMPLATE
+        file_types = ["testbench", "interface", "test", "sequence", "driver", "monitor"]
+        source_scores: Dict[str, float] = defaultdict(float)
+        for file_type in file_types:
+            source, value = self._rl_learner.select_best_action(
+                protocol=protocol,
+                file_type=file_type,
+                available_sources=available_sources,
+                spec_dict=spec_dict,
+            )
+            source_scores[source] += value
+        if not source_scores:
+            return GenerationSource.TEMPLATE
+        best_source = max(source_scores.keys(), key=lambda s: source_scores[s])
+        return GenerationSource(best_source)
+    def _generate_with_strategy(
+        self,
+        strategy: GenerationSource,
+        spec: DesignSpec,
+        spec_dict: Dict[str, Any],
+        config: PipelineConfig,
+        design_name: str,
+        protocol: str,
+    ) -> GenerationResult:
+        """Generate using selected strategy."""
+        if strategy == GenerationSource.RETRIEVAL:
+            return self._generate_by_retrieval(
+                spec=spec,
+                spec_dict=spec_dict,
+                config=config,
+                design_name=design_name,
+                protocol=protocol,
+            )
+        elif strategy == GenerationSource.LLM and self._use_llm:
+            return self._generate_by_llm(
+                spec=spec,
+                spec_dict=spec_dict,
+                config=config,
+                design_name=design_name,
+            )
+        else:
+            return self._generate_by_template(
+                spec=spec,
+                config=config,
+                design_name=design_name,
+                protocol=protocol,
+            )
+    def _generate_by_retrieval(
+        self,
+        spec: DesignSpec,
+        spec_dict: Dict[str, Any],
+        config: PipelineConfig,
+        design_name: str,
+        protocol: str,
+    ) -> GenerationResult:
+        """Generate using retrieval-based adaptation."""
+        if not self._index or not self._extractor or not self._adapter:
+            return GenerationResult(source=GenerationSource.TEMPLATE)
+        features = self._extractor.extract(spec)
+        search_results = self._index.search(features, top_k=5)
+        if not search_results:
+            logger.info("No similar specs found in index, falling back to templates")
+            return GenerationResult(source=GenerationSource.TEMPLATE)
+        best_result = search_results[0]
+        best_spec = best_result.spec_dict
+        retrieval_info = RetrievalInfo(
+            used_similarity=True,
+            similar_specs=len(search_results),
+            best_score=best_result.similarity,
+            best_spec_name=best_result.design_name,
+            retrieval_strategy="similarity_search",
+        )
+        logger.info(
+            f"Best match: '{best_result.design_name}' "
+            f"(similarity: {best_result.similarity:.3f})"
+        )
+        if best_result.generated_files:
+            adaptation = self._adapter.adapt(
+                source_spec=best_spec,
+                target_spec=spec_dict,
+                source_files=best_result.generated_files,
+            )
+            retrieval_info.adaptation_score = adaptation.score
+            if adaptation.errors:
+                logger.warning(f"Adaptation errors: {adaptation.errors}")
+            if adaptation.score >= 0.7:
+                files = adaptation.adapted_files
+                validation_score = 0.5
+                if self._code_validator:
+                    report = self._code_validator.validate_files(files, design_name)
+                    validation_score = report.avg_score
+                    retrieval_info.pre_validation_score = validation_score
+                    if report.overall_passed or not self._strict_validation:
+                        return GenerationResult(
+                            files=files,
+                            source=GenerationSource.RETRIEVAL,
+                            retrieval_info=retrieval_info,
+                            validation_report=report,
+                            score=validation_score,
+                        )
+                    else:
+                        logger.warning(
+                            f"Retrieved code failed validation "
+                            f"({report.total_errors} errors), will try alternatives"
+                        )
+            else:
+                logger.warning(
+                    f"Adaptation score too low ({adaptation.score:.2f} < 0.7), "
+                    "falling back to alternatives"
+                )
+        if len(search_results) > 1:
+            for alt_result in search_results[1:3]:
+                if alt_result.generated_files and alt_result.similarity >= 0.5:
+                    logger.info(f"Trying alternative: '{alt_result.design_name}'")
+                    adaptation = self._adapter.adapt(
+                        source_spec=alt_result.spec_dict,
+                        target_spec=spec_dict,
+                        source_files=alt_result.generated_files,
+                    )
+                    if adaptation.score >= 0.7:
+                        files = adaptation.adapted_files
+                        if self._code_validator:
+                            report = self._code_validator.validate_files(files, design_name)
+                            if report.overall_passed or not self._strict_validation:
+                                retrieval_info.best_spec_name = alt_result.design_name
+                                retrieval_info.best_score = alt_result.similarity
+                                retrieval_info.adaptation_score = adaptation.score
+                                retrieval_info.pre_validation_score = report.avg_score
+                                return GenerationResult(
+                                    files=files,
+                                    source=GenerationSource.RETRIEVAL,
+                                    retrieval_info=retrieval_info,
+                                    validation_report=report,
+                                    score=report.avg_score,
+                                )
+        return GenerationResult(
+            source=GenerationSource.RETRIEVAL,
+            retrieval_info=retrieval_info,
+            errors=["Retrieval generation did not pass validation thresholds"],
+        )
+    def _generate_by_llm(
+        self,
+        spec: DesignSpec,
+        spec_dict: Dict[str, Any],
+        config: PipelineConfig,
+        design_name: str,
+    ) -> GenerationResult:
+        """Generate using LLM (placeholder for now)."""
+        logger.info("LLM generation requested but not fully implemented")
+        return GenerationResult(
+            source=GenerationSource.LLM,
+            errors=["LLM generation not available"],
+        )
+    def _generate_by_template(
+        self,
+        spec: DesignSpec,
+        config: PipelineConfig,
+        design_name: str,
+        protocol: str,
+    ) -> GenerationResult:
+        """Generate using templates."""
+        files = self._template_model.predict(spec, config)
+        score = 0.7
+        report = None
+        if self._code_validator:
+            report = self._code_validator.validate_files(files, design_name)
+            score = report.avg_score
+        return GenerationResult(
+            files=files,
+            source=GenerationSource.TEMPLATE,
+            validation_report=report,
+            score=score,
+        )
+    def _apply_validation_and_fallback(
+        self,
+        result: GenerationResult,
+        spec: DesignSpec,
+        config: PipelineConfig,
+        spec_dict: Dict[str, Any],
+        design_name: str,
+        protocol: str,
+    ) -> GenerationResult:
+        """Apply validation and use fallback if needed."""
+        if result.files and not result.errors:
+            return result
+        if result.source == GenerationSource.TEMPLATE and result.files:
+            return result
+        logger.warning(
+            f"Primary strategy ({result.source.value}) failed or not available, "
+            "falling back to template generation"
+        )
+        template_result = self._generate_by_template(
+            spec=spec,
+            config=config,
+            design_name=design_name,
+            protocol=protocol,
+        )
+        if result.retrieval_info:
+            template_result.retrieval_info = result.retrieval_info
+        template_result.warnings.extend([
+            f"Fell back from {result.source.value} to templates",
+        ])
+        if result.errors:
+            template_result.warnings.extend(result.errors)
+        return template_result
+    def _record_learning(
+        self,
+        final_result: GenerationResult,
+        spec_dict: Dict[str, Any],
+        design_name: str,
+        protocol: str,
+        selected_source: GenerationSource,
+    ) -> None:
+        """Record learning data for continuous improvement."""
+        if not self._use_learning:
+            return
+        score = final_result.score
+        passed = final_result.validation_report.overall_passed if final_result.validation_report else (score >= 0.7)
+        reward = 1.0 if passed else (-0.5 if not passed else 0.3)
+        used_source = (
+            final_result.source.value
+            if final_result.source != selected_source
+            else selected_source.value
+        )
+        if final_result.validation_report:
+            for file_result in final_result.validation_report.files:
+                if self._rl_learner:
+                    self._rl_learner.update(
+                        protocol=protocol,
+                        file_type=file_result.file_type,
+                        generation_source=used_source,
+                        reward=1.0 if file_result.passed else -0.3,
+                        spec_dict=spec_dict,
+                        metadata={
+                            "design_name": design_name,
+                            "score": file_result.score,
+                            "error_count": file_result.error_count,
+                        },
+                    )
+                if self._pattern_learner:
+                    if file_result.passed and file_result.score >= 0.7:
+                        self._pattern_learner.record_success(
+                            file_type=file_result.file_type,
+                            protocol=protocol,
+                            score=file_result.score,
+                        )
+                    else:
+                        for issue in file_result.issues:
+                            if issue.severity.value == "error":
+                                self._pattern_learner.record_error(
+                                    error_msg=issue.message,
+                                    file_type=file_result.file_type,
+                                    line_num=issue.line_number,
+                                )
+        history_entry = {
+            "timestamp": datetime.now().isoformat(),
+            "design_name": design_name,
+            "protocol": protocol,
+            "selected_source": selected_source.value,
+            "actual_source": final_result.source.value,
+            "score": score,
+            "passed": passed,
+            "reward": reward,
+            "error_count": (
+                final_result.validation_report.total_errors
+                if final_result.validation_report else 0
+            ),
+        }
+        self._generation_history.append(history_entry)
+        if len(self._generation_history) > 100:
+            self._generation_history = self._generation_history[-100:]
+        if self._rl_learner and len(self._generation_history) % 10 == 0:
+            replay_count = self._rl_learner.replay_experiences(batch_size=32)
+            logger.debug(f"Replayed {replay_count} experiences")
+        if self._learning_storage_path:
+            self._save_learning_state()
+    def _save_learning_state(self) -> None:
+        """Save learning state to storage."""
+        if not self._learning_storage_path:
+            return
+        try:
+            os.makedirs(os.path.dirname(self._learning_storage_path), exist_ok=True)
+            state = {
+                "saved_at": datetime.now().isoformat(),
+                "generation_history": self._generation_history[-500:],
+                "strategy_weights": {
+                    "retrieval": self._strategy_weights.retrieval_weight,
+                    "llm": self._strategy_weights.llm_weight,
+                    "template": self._strategy_weights.template_weight,
+                },
+            }
+            if self._rl_learner:
+                state["rl_learner"] = self._rl_learner.to_dict()
+            if self._pattern_learner:
+                state["pattern_learner"] = self._pattern_learner.to_dict()
+            with open(self._learning_storage_path, "w") as f:
+                json.dump(state, f, indent=2)
+            logger.info(f"Learning state saved to: {self._learning_storage_path}")
+        except Exception as e:
+            logger.warning(f"Could not save learning state: {e}")
+    def _load_learning_state(self) -> None:
+        """Load learning state from storage."""
+        if not self._learning_storage_path or not os.path.exists(self._learning_storage_path):
+            return
+        try:
+            with open(self._learning_storage_path, "r") as f:
+                state = json.load(f)
+            self._generation_history = state.get("generation_history", [])
+            weights = state.get("strategy_weights", {})
+            if weights:
+                self._strategy_weights = StrategyWeights(
+                    retrieval_weight=weights.get("retrieval", 0.4),
+                    llm_weight=weights.get("llm", 0.3),
+                    template_weight=weights.get("template", 0.3),
+                )
+            if "rl_learner" in state and self._rl_learner:
+                from src.models.advanced_rl_learner import AdvancedReinforcementLearner
+                self._rl_learner = AdvancedReinforcementLearner.from_dict(state["rl_learner"])
+            if "pattern_learner" in state and self._pattern_learner:
+                from src.models.advanced_pattern_learner import AdvancedPatternLearner
+                self._pattern_learner = AdvancedPatternLearner.from_dict(state["pattern_learner"])
+            logger.info(f"Learning state loaded from: {self._learning_storage_path}")
+        except Exception as e:
+            logger.warning(f"Could not load learning state: {e}")
+    def get_learning_stats(self) -> Dict[str, Any]:
+        """Get comprehensive learning statistics."""
+        stats = {
+            "total_generations": len(self._generation_history),
+            "strategy_weights": {
+                "retrieval": self._strategy_weights.retrieval_weight,
+                "llm": self._strategy_weights.llm_weight,
+                "template": self._strategy_weights.template_weight,
+            },
+        }
+        if self._generation_history:
+            recent = self._generation_history[-50:]
+            passed = sum(1 for h in recent if h.get("passed", False))
+            avg_score = sum(h.get("score", 0) for h in recent) / len(recent)
+            stats["recent_performance"] = {
+                "window_size": len(recent),
+                "pass_rate": passed / len(recent),
+                "avg_score": avg_score,
+            }
+            sources = [h.get("actual_source", "unknown") for h in recent]
+            stats["source_distribution"] = dict(Counter(sources))
+        if self._rl_learner:
+            stats["rl_learner"] = self._rl_learner.get_performance_stats()
+        if self._pattern_learner:
+            stats["pattern_learner"] = self._pattern_learner.get_suggestions(
+                file_type="any",
+                protocol="any",
+            )
+        return stats
+    @staticmethod
+    def _spec_to_dict(spec: DesignSpec) -> Dict[str, Any]:
+        """Convert DesignSpec to serializable dict."""
+        return {
+            "design_name": spec.design_name,
+            "protocol": spec.protocol,
+            "clock_reset": {
+                "clock": spec.clock_reset.clock,
+                "reset": spec.clock_reset.reset,
+                "reset_active": spec.clock_reset.reset_active,
+            },
+            "interfaces": [
+                {
+                    "name": iface.name,
+                    "signals": [
+                        {"name": s.name, "direction": s.direction, "width": s.width}
+                        for s in iface.signals
+                    ],
+                }
+                for iface in spec.interfaces
+            ],
+            "registers": [
+                {
+                    "name": r.name,
+                    "address": r.address,
+                    "access": r.access,
+                    "size": r.size,
+                    "reset_value": r.reset_value,
+                    "fields": [
+                        {"name": f.name, "bits": f.bits, "description": f.description}
+                        for f in r.fields
+                    ],
+                }
+                for r in spec.registers
+            ],
+        }
+    def save(self, path: str) -> None:
+        """Save the model state to disk."""
+        self.save_learning_state(path)
+        logger.info("Saved EnhancedMLGenerationModelV2 to %s", path)
+    @classmethod
+    def load(cls, path: str) -> "EnhancedMLGenerationModelV2":
+        """Load the model from disk."""
+        model = cls(
+            name="enhanced_ml_model_v2",
+            use_learning=True,
+        )
+        model.load_learning_state(path)
+        logger.info("Loaded EnhancedMLGenerationModelV2 from %s", path)
+        return model
+    @property
+    def is_trained(self) -> bool:
+        """Check if model is trained."""
+        if self._index is not None:
+            return len(self._index) > 0
+        return False
+    @property
+    def index(self) -> Optional[SimilarityIndex]:
+        """Get the similarity index."""
+        return self._index

src/pipeline.py CHANGED Viewed

@@ -13,6 +13,7 @@ from src.features.extractors import SpecFeatureExtractor
 from src.generation.engine import GenerationEngine
 from src.models.base_model import GenerationModel
 from src.models.enhanced_ml_model import EnhancedMLGenerationModel
 from src.models.ml_generation_model import MLGenerationModel, MLModelConfig
 from src.models.registry import ModelRegistry
 from src.models.template_model import TemplateModel
@@ -55,7 +56,7 @@ class TBPipeline:
         model_type = ml_cfg.model_type
         self.logger.info("ML generation enabled, model_type=%s", model_type)
-        if model_type in ("ml", "hybrid", "llm", "semantic"):
             ml_model_config = MLModelConfig(
                 similarity_threshold=ml_cfg.similarity_threshold,
                 auto_learn=ml_cfg.auto_learn,
@@ -63,18 +64,34 @@ class TBPipeline:
                 top_k_retrieval=ml_cfg.top_k_retrieval,
                 fallback_to_templates=ml_cfg.fallback_to_templates,
             )
-            model = EnhancedMLGenerationModel(
-                name="enhanced_ml_model",
-                config=ml_model_config,
-                templates_dir=self.cfg.generation.templates_dir,
-                strict_validation=True,
-                use_llm=ml_cfg.use_llm,
-                use_semantic_encoder=ml_cfg.use_semantic_encoder,
-                use_learning=ml_cfg.use_learning,
-                llm_model_name=ml_cfg.llm_model_name,
-                learning_storage_path=ml_cfg.learning_storage_path,
-            )
-            self.logger.info("Created EnhancedMLGenerationModel with index size: %d", len(model.index))
             if model_type == "llm":
                 self.logger.info("LLM mode: will prioritize LLM generation")

 from src.generation.engine import GenerationEngine
 from src.models.base_model import GenerationModel
 from src.models.enhanced_ml_model import EnhancedMLGenerationModel
+from src.models.enhanced_ml_model_v2 import EnhancedMLGenerationModelV2
 from src.models.ml_generation_model import MLGenerationModel, MLModelConfig
 from src.models.registry import ModelRegistry
 from src.models.template_model import TemplateModel
         model_type = ml_cfg.model_type
         self.logger.info("ML generation enabled, model_type=%s", model_type)
+        if model_type in ("ml", "hybrid", "llm", "semantic", "v2"):
             ml_model_config = MLModelConfig(
                 similarity_threshold=ml_cfg.similarity_threshold,
                 auto_learn=ml_cfg.auto_learn,
                 top_k_retrieval=ml_cfg.top_k_retrieval,
                 fallback_to_templates=ml_cfg.fallback_to_templates,
             )
+            if model_type == "v2":
+                model = EnhancedMLGenerationModelV2(
+                    name="enhanced_ml_model_v2",
+                    config=ml_model_config,
+                    templates_dir=self.cfg.generation.templates_dir,
+                    strict_validation=True,
+                    use_llm=ml_cfg.use_llm,
+                    use_semantic_encoder=ml_cfg.use_semantic_encoder,
+                    use_learning=ml_cfg.use_learning,
+                    llm_model_name=ml_cfg.llm_model_name,
+                    learning_storage_path=ml_cfg.learning_storage_path,
+                    exploration_strategy=getattr(ml_cfg, 'exploration_strategy', 'ucb'),
+                )
+                self.logger.info("Created EnhancedMLGenerationModelV2 with advanced RL and pattern learning")
+            else:
+                model = EnhancedMLGenerationModel(
+                    name="enhanced_ml_model",
+                    config=ml_model_config,
+                    templates_dir=self.cfg.generation.templates_dir,
+                    strict_validation=True,
+                    use_llm=ml_cfg.use_llm,
+                    use_semantic_encoder=ml_cfg.use_semantic_encoder,
+                    use_learning=ml_cfg.use_learning,
+                    llm_model_name=ml_cfg.llm_model_name,
+                    learning_storage_path=ml_cfg.learning_storage_path,
+                )
+                self.logger.info("Created EnhancedMLGenerationModel with index size: %d", len(model.index))
             if model_type == "llm":
                 self.logger.info("LLM mode: will prioritize LLM generation")

streamlit_app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-Streamlit UI for UVM Testbench Generator
-Deploy to: https://share.streamlit.io/
 """
 import streamlit as st
@@ -11,20 +11,18 @@ import zipfile
 import io
 from pathlib import Path
 from datetime import datetime
-# Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("uvmgen-streamlit")
-# Page config
 st.set_page_config(
-    page_title="UVM Testbench Generator",
     page_icon="🔬",
     layout="wide",
     initial_sidebar_state="expanded",
 )
-# Example specifications
 EXAMPLES = {
     "UART": """design_name: uart
 clock_reset:
@@ -58,20 +56,50 @@ interfaces:
         direction: output
       - name: uart_rx
         direction: input
 registers:
   - name: RBR_THR
     address: 0x0
     description: Receiver Buffer / Transmitter Holding
   - name: IER
     address: 0x1
     description: Interrupt Enable
   - name: LCR
     address: 0x3
     description: Line Control
   - name: LSR
     address: 0x5
     description: Line Status
 protocol: uart""",
     "SPI": """design_name: spi_controller
@@ -181,239 +209,514 @@ registers:
 protocol: i2c"""
 }
-# Session state
 if 'last_result' not in st.session_state:
     st.session_state.last_result = None
 if 'generated_files' not in st.session_state:
     st.session_state.generated_files = {}
 if 'log_output' not in st.session_state:
     st.session_state.log_output = []
-# Header
 st.title("🔬 UVM Testbench Generator")
 st.markdown("""
-**AI-Powered Semiconductor Verification Pipeline**
-Generate industry-grade UVM testbenches from YAML specifications with protocol libraries, coverage-driven auto-training, and CI/CD integration.
 """)
-# Sidebar
 with st.sidebar:
     st.header("⚙️ Configuration")
-    # Protocol selector
-    selected_protocol = st.selectbox(
-        "Select Protocol Example",
-        list(EXAMPLES.keys()),
-        index=0
-    )
-    # Design name
-    default_name = selected_protocol.lower() + "_controller"
-    design_name = st.text_input(
-        "Design Name",
-        value=default_name
-    )
     st.divider()
-    # Options
-    st.subheader("Options")
-    use_ml = st.checkbox(
-        "Enable AI/ML Features",
-        value=True,
-        help="Use semantic embeddings and learning (when dependencies available)"
-    )
-    auto_train = st.checkbox(
-        "Enable Auto-Training",
-        value=False,
-        help="Coverage-driven iterative improvement"
-    )
-    max_iterations = st.slider(
-        "Max Iterations",
-        min_value=1,
-        max_value=10,
-        value=1
-    )
     st.divider()
-    st.info("💡 UVM = Universal Verification Methodology")
-    st.caption(f"Developed by **Sai Kumar Taraka**")
-# Main content
-col1, col2 = st.columns([1, 1])
-with col1:
-    st.subheader("📝 Specification")
-    # Spec editor
-    spec_text = st.text_area(
-        "YAML Specification",
-        value=EXAMPLES[selected_protocol],
-        height=400,
-        key="spec_editor",
-        help="Edit the YAML specification for your design"
-    )
-    # Generate button
     generate_btn = st.button(
         "🚀 Generate UVM Testbench",
         type="primary",
-        use_container_width=True
     )
-with col2:
-    st.subheader("📊 Results & Output")
-    # Status
     status_placeholder = st.empty()
-    # Metrics
     metrics_placeholder = st.empty()
-    # Logs
     with st.expander("📋 Log Output", expanded=True):
         log_placeholder = st.empty()
-    # Files
     files_placeholder = st.empty()
-# Generate logic
 if generate_btn:
     st.session_state.log_output = []
     st.session_state.last_result = None
     st.session_state.generated_files = {}
     status_placeholder.info("🔄 Generating UVM testbench...")
     try:
-        # Import here for lazy loading
         from src.config import ConfigLoader, PipelineConfig
         from src.pipeline import TBPipeline
-        # Save spec to temp file
         with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False, encoding='utf-8') as f:
             f.write(spec_text)
             spec_path = f.name
-        st.session_state.log_output.append(f"[{datetime.now().strftime('%H:%M:%S')}] Starting generation for: {design_name}")
         log_placeholder.code("\n".join(st.session_state.log_output))
-        # Create pipeline
         pipeline = TBPipeline()
-        pipeline.cfg.ml.enabled = use_ml
-        pipeline.cfg.ml.model_type = "hybrid"
-        pipeline.cfg.ml.use_llm = use_ml
-        pipeline.cfg.ml.use_semantic_encoder = use_ml
-        pipeline.cfg.ml.use_learning = use_ml
         pipeline.cfg.auto_train.enabled = auto_train
         pipeline.cfg.auto_train.max_iterations = max_iterations
-        st.session_state.log_output.append(f"[{datetime.now().strftime('%H:%M:%S')}] ML enabled: {use_ml}")
-        st.session_state.log_output.append(f"[{datetime.now().strftime('%H:%M:%S')}] Auto-train: {auto_train} (iterations: {max_iterations})")
-        log_placeholder.code("\n".join(st.session_state.log_output))
-        # Run pipeline
         result = pipeline.run(spec_path)
-        # Cleanup
         try:
             os.unlink(spec_path)
         except:
             pass
-        # Store results
         st.session_state.last_result = result
         st.session_state.generated_files = result.get('generated_files', {})
-        st.session_state.log_output.append(f"[{datetime.now().strftime('%H:%M:%S')}] Generation complete!")
-        st.session_state.log_output.append(f"[{datetime.now().strftime('%H:%M:%S')}] Files generated: {len(st.session_state.generated_files)}")
         log_placeholder.code("\n".join(st.session_state.log_output))
-        # Update status
         if result.get('passed'):
             status_placeholder.success("✅ Generation successful!")
         else:
             status_placeholder.warning("⚠️ Generation completed with issues")
     except Exception as e:
-        st.session_state.log_output.append(f"[{datetime.now().strftime('%H:%M:%S')}] ERROR: {str(e)}")
         log_placeholder.code("\n".join(st.session_state.log_output))
         status_placeholder.error(f"❌ Error: {str(e)}")
         import traceback
         st.session_state.log_output.append(traceback.format_exc())
         log_placeholder.code("\n".join(st.session_state.log_output))
-# Show results
 if st.session_state.last_result:
-    result = st.session_state.last_result
-    # Metrics
-    with metrics_placeholder.container():
-        eval_metrics = result.get('evaluation', {})
-        m1, m2, m3 = st.columns(3)
-        with m1:
-            completeness = eval_metrics.get('completeness', 0) * 100
-            st.metric("Completeness", f"{completeness:.1f}%")
-        with m2:
-            signal_cov = eval_metrics.get('interface_signal_coverage', 0) * 100
-            st.metric("Signal Coverage", f"{signal_cov:.1f}%")
-        with m3:
-            reg_cov = eval_metrics.get('register_coverage', 0) * 100
-            st.metric("Register Coverage", f"{reg_cov:.1f}%")
-        m4, m5 = st.columns(2)
-        with m4:
-            st.metric("Files Generated", len(st.session_state.generated_files))
-        with m5:
-            st.metric("Iterations", result.get('auto_train_iterations', 0))
-    # Files list
-    with files_placeholder.expander("📄 Generated Files", expanded=True):
-        if st.session_state.generated_files:
-            # File selector
-            file_names = sorted(st.session_state.generated_files.keys())
-            selected_file = st.selectbox("Select file to preview", file_names)
-            if selected_file:
-                file_path = st.session_state.generated_files[selected_file]
-                if os.path.exists(file_path):
-                    try:
-                        with open(file_path, 'r', encoding='utf-8') as f:
-                            content = f.read()
-                        st.code(content, language='systemverilog')
-                    except Exception as e:
-                        st.warning(f"Could not read file: {e}")
-        # Download ZIP
-        if st.session_state.generated_files:
-            zip_buffer = io.BytesIO()
-            with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zipf:
-                for name, path in st.session_state.generated_files.items():
-                    if os.path.exists(path):
-                        zipf.write(path, arcname=name)
-            zip_buffer.seek(0)
-            st.download_button(
-                label="📥 Download All Files as ZIP",
-                data=zip_buffer,
-                file_name=f"{design_name}_uvm_testbench.zip",
-                mime="application/zip",
-                use_container_width=True,
-                type="secondary"
-            )
-# Footer
 st.divider()
-st.caption("""
-**UVM Testbench Generator** • AI-Powered by Sai Kumar Taraka
-Protocol Libraries: UART, SPI, I2C, AXI4-Lite, APB, Wishbone • Coverage-Driven Auto-Training
-""")

 """
+Enhanced Streamlit UI for UVM Testbench Generator
+Shows advanced ML capabilities: V2 model, RL strategies, learning persistence, etc.
 """
 import streamlit as st
 import io
 from pathlib import Path
 from datetime import datetime
+import json
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("uvmgen-streamlit")
 st.set_page_config(
+    page_title="UVM Testbench Generator - AI/ML Enhanced",
     page_icon="🔬",
     layout="wide",
     initial_sidebar_state="expanded",
 )
 EXAMPLES = {
     "UART": """design_name: uart
 clock_reset:
         direction: output
       - name: uart_rx
         direction: input
+      - name: cts_n
+        direction: input
+      - name: rts_n
+        direction: output
+      - name: uart_intr
+        direction: output
 registers:
   - name: RBR_THR
     address: 0x0
     description: Receiver Buffer / Transmitter Holding
+    fields:
+      - name: data
+        bits: 7:0
   - name: IER
     address: 0x1
     description: Interrupt Enable
+    fields:
+      - name: erbfi
+        bits: '0'
+        description: Enable RX data available interrupt
+      - name: etbei
+        bits: '1'
+        description: Enable TX holding register empty interrupt
   - name: LCR
     address: 0x3
     description: Line Control
+    fields:
+      - name: wls
+        bits: 1:0
+        description: Word length select
+      - name: dlab
+        bits: '7'
+        description: Divisor latch access bit
   - name: LSR
     address: 0x5
     description: Line Status
+    fields:
+      - name: dr
+        bits: '0'
+        description: Data Ready
+      - name: thre
+        bits: '5'
+        description: TX Holding Register Empty
 protocol: uart""",
     "SPI": """design_name: spi_controller
 protocol: i2c"""
 }
+MODEL_TYPES = {
+    "template": "Template Only (Fast, No Learning)",
+    "hybrid": "Hybrid ML (Retrieval + Templates)",
+    "v2": "Advanced ML V2 (Recommended) - RL + Pattern Learning",
+}
+EXPLORATION_STRATEGIES = {
+    "ucb": "UCB1 (Upper Confidence Bound) - Best for exploration/exploitation balance",
+    "epsilon_greedy": "Epsilon-Greedy - Simple, with decaying randomness",
+    "softmax": "Softmax (Boltzmann) - Probabilistic based on Q-values",
+    "thompson": "Thompson Sampling - Bayesian approach with Beta distributions",
+}
 if 'last_result' not in st.session_state:
     st.session_state.last_result = None
 if 'generated_files' not in st.session_state:
     st.session_state.generated_files = {}
 if 'log_output' not in st.session_state:
     st.session_state.log_output = []
+if 'ml_stats' not in st.session_state:
+    st.session_state.ml_stats = None
+if 'learning_state_path' not in st.session_state:
+    st.session_state.learning_state_path = None
 st.title("🔬 UVM Testbench Generator")
 st.markdown("""
+**AI-Powered Semiconductor Verification Pipeline with Advanced ML**
+Generate industry-grade UVM testbenches from YAML specifications. Now featuring:
+- **Advanced ML V2** with Reinforcement Learning (UCB, Softmax, Thompson Sampling)
+- **Experience Replay Buffer** (10,000 capacity)
+- **Eligibility Traces** for better credit assignment
+- **Pattern Mining** with N-grams and Association Rules
+- **Deep UVM Compliance Validation** (factory registration, phases, TLM)
+- **Continuous Learning** with state persistence
 """)
 with st.sidebar:
     st.header("⚙️ Configuration")
+    with st.expander("📋 Quick Setup", expanded=True):
+        selected_protocol = st.selectbox(
+            "Protocol Example",
+            list(EXAMPLES.keys()),
+            index=0,
+            help="Select a pre-built protocol specification"
+        )
+        default_name = selected_protocol.lower() + "_controller"
+        design_name = st.text_input(
+            "Design Name",
+            value=default_name,
+            help="Name for your generated IP"
+        )
     st.divider()
+    with st.expander("🤖 ML Configuration", expanded=True):
+        use_ml = st.checkbox(
+            "Enable AI/ML Features",
+            value=True,
+            help="Use machine learning for intelligent generation"
+        )
+        if use_ml:
+            model_type = st.selectbox(
+                "ML Model Version",
+                list(MODEL_TYPES.keys()),
+                index=2,
+                format_func=lambda k: MODEL_TYPES[k],
+                help="V2 is recommended for advanced learning"
+            )
+            if model_type == "v2":
+                exploration_strategy = st.selectbox(
+                    "RL Exploration Strategy",
+                    list(EXPLORATION_STRATEGIES.keys()),
+                    index=0,
+                    format_func=lambda k: EXPLORATION_STRATEGIES[k].split(" - ")[0],
+                    help="How the RL agent balances exploration and exploitation"
+                )
+                st.caption(EXPLORATION_STRATEGIES[exploration_strategy])
+                persist_learning = st.checkbox(
+                    "Persist Learning State",
+                    value=True,
+                    help="Save and load learned patterns between sessions"
+                )
+                if persist_learning:
+                    st.session_state.learning_state_path = os.path.join(
+                        tempfile.gettempdir(),
+                        "uvmgen_learning_state.json"
+                    )
+                    st.caption(f"State will be saved to: temporary directory")
+            strict_validation = st.checkbox(
+                "Strict UVM Compliance",
+                value=True,
+                help="Enforce deep UVM validation (factory, phases, TLM)"
+            )
+            auto_learn = st.checkbox(
+                "Continuous Learning",
+                value=True,
+                help="Learn from each generation to improve future results"
+            )
+        else:
+            model_type = "template"
+            exploration_strategy = "ucb"
+            strict_validation = False
+            auto_learn = False
+    st.divider()
+    with st.expander("⚡ Generation Options"):
+        auto_train = st.checkbox(
+            "Coverage-Driven Auto-Training",
+            value=False,
+            help="Iteratively improve testbench based on coverage analysis"
+        )
+        max_iterations = st.slider(
+            "Max Iterations",
+            min_value=1,
+            max_value=10,
+            value=1,
+            help="Maximum auto-training iterations"
+        )
+        st.caption("Auto-training requires a simulator (Icarus Verilog, VCS, or Questa)")
     st.divider()
+    with st.expander("ℹ️ About"):
+        st.info("💡 **UVM = Universal Verification Methodology**")
+        st.info("🔬 **ML V2 = Reinforcement Learning + Pattern Mining**")
+        st.markdown("---")
+        st.caption("Developed by **Sai Kumar Taraka**")
+        st.caption("Promotion-Ready Advanced ML System")
+tab_spec, tab_results, tab_ml_insights = st.tabs([
+    "📝 Specification",
+    "📊 Results & Files",
+    "🤖 ML Insights"
+])
+with tab_spec:
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        st.subheader("✏️ YAML Specification Editor")
+        spec_text = st.text_area(
+            "Edit your specification",
+            value=EXAMPLES[selected_protocol],
+            height=450,
+            key="spec_editor",
+            help="Define your interfaces, signals, registers, and protocol"
+        )
+        st.caption(f"Protocol: {selected_protocol} | Model: {model_type.upper()} | Strategy: {exploration_strategy.upper()}")
+    with col2:
+        st.subheader("📋 Specification Summary")
+        import yaml
+        try:
+            spec_dict = yaml.safe_load(spec_text)
+            st.metric("Design Name", spec_dict.get('design_name', 'unknown'))
+            st.metric("Protocol", spec_dict.get('protocol', 'unknown').upper())
+            col_a, col_b = st.columns(2)
+            with col_a:
+                interfaces = spec_dict.get('interfaces', [])
+                st.metric("Interfaces", len(interfaces))
+                total_signals = sum(len(i.get('signals', [])) for i in interfaces)
+                st.metric("Total Signals", total_signals)
+            with col_b:
+                registers = spec_dict.get('registers', [])
+                st.metric("Registers", len(registers))
+                total_fields = sum(len(r.get('fields', [])) for r in registers)
+                st.metric("Register Fields", total_fields)
+            if interfaces:
+                st.subheader("Interface Signals")
+                for iface in interfaces:
+                    with st.expander(f"🔌 {iface.get('name', 'unknown')}"):
+                        signals = iface.get('signals', [])
+                        for sig in signals:
+                            name = sig.get('name', 'unknown')
+                            direction = sig.get('direction', 'input')
+                            width = sig.get('width', 1)
+                            st.text(f"  • {name} ({direction}, {width}bit)")
+            if registers:
+                st.subheader("Register Map")
+                for reg in registers:
+                    with st.expander(f"📋 {reg.get('name', 'unknown')} @ {reg.get('address', '0x0')}"):
+                        st.text(f"  Description: {reg.get('description', 'None')}")
+                        fields = reg.get('fields', [])
+                        if fields:
+                            st.text(f"  Fields:")
+                            for field in fields:
+                                st.text(f"    • {field.get('name', 'unknown')} [{field.get('bits', '0')}]")
+        except Exception as e:
+            st.error(f"Invalid YAML: {e}")
+    st.divider()
     generate_btn = st.button(
         "🚀 Generate UVM Testbench",
         type="primary",
+        use_container_width=True,
+        help=f"Generate using {model_type.upper()} model"
     )
+with tab_results:
     status_placeholder = st.empty()
     metrics_placeholder = st.empty()
     with st.expander("📋 Log Output", expanded=True):
         log_placeholder = st.empty()
     files_placeholder = st.empty()
+with tab_ml_insights:
+    st.header("🤖 Advanced ML Insights")
+    if st.session_state.ml_stats:
+        stats = st.session_state.ml_stats
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("📊 Learning Statistics")
+            total_gen = stats.get('total_generations', 0)
+            st.metric("Total Generations", total_gen)
+            if 'recent_performance' in stats:
+                perf = stats['recent_performance']
+                st.metric("Recent Pass Rate", f"{perf.get('pass_rate', 0)*100:.1f}%")
+                st.metric("Avg Score", f"{perf.get('avg_score', 0):.3f}")
+            if 'rl_learner' in stats:
+                rl_stats = stats['rl_learner']
+                st.subheader("🎮 Reinforcement Learning")
+                st.metric("Episode Count", rl_stats.get('episode_count', 0))
+                st.metric("Total Updates", rl_stats.get('total_updates', 0))
+                st.metric("Learning Rate", f"{rl_stats.get('learning_rate', 0.1):.4f}")
+                if 'state_stats' in rl_stats:
+                    st.subheader("📈 Strategy Performance")
+                    state_stats = rl_stats['state_stats']
+                    for state, info in list(state_stats.items())[:5]:
+                        st.text(f"  {state}: best='{info.get('best_action', 'unknown')}' (Q={info.get('best_q_value', 0):.3f})")
+        with col2:
+            st.subheader("🎯 Source Distribution")
+            if 'source_distribution' in stats:
+                source_dist = stats['source_distribution']
+                fig_data = {
+                    'Source': list(source_dist.keys()),
+                    'Count': list(source_dist.values())
+                }
+                st.bar_chart(fig_data, x='Source', y='Count')
+            st.subheader("⚖️ Strategy Weights")
+            if 'strategy_weights' in stats:
+                weights = stats['strategy_weights']
+                st.json(weights)
+            if 'pattern_learner' in stats:
+                st.subheader("🔍 Pattern Learner")
+                patterns = stats['pattern_learner']
+                if 'common_errors' in patterns:
+                    st.text("Common Error Patterns:")
+                    for err, count in patterns['common_errors'][:5]:
+                        st.text(f"  • {err}: {count} occurrences")
+                if 'recommendations' in patterns:
+                    st.subheader("💡 Recommendations")
+                    for rec in patterns['recommendations'][:5]:
+                        st.info(rec)
+        st.divider()
+        col_a, col_b = st.columns(2)
+        with col_a:
+            if st.button("📥 Export Learning State"):
+                if st.session_state.learning_state_path and os.path.exists(st.session_state.learning_state_path):
+                    with open(st.session_state.learning_state_path, 'r') as f:
+                        state_data = f.read()
+                    st.download_button(
+                        "Download Learning State JSON",
+                        data=state_data,
+                        file_name="uvmgen_learning_state.json",
+                        mime="application/json"
+                    )
+                else:
+                    st.warning("No learning state saved yet")
+        with col_b:
+            uploaded_file = st.file_uploader("📤 Import Learning State", type="json")
+            if uploaded_file is not None:
+                try:
+                    state_data = json.load(uploaded_file)
+                    if st.session_state.learning_state_path:
+                        with open(st.session_state.learning_state_path, 'w') as f:
+                            json.dump(state_data, f, indent=2)
+                        st.success("Learning state imported! It will be loaded on next generation.")
+                except Exception as e:
+                    st.error(f"Failed to import: {e}")
+    else:
+        st.info("Run a generation first to see ML insights.")
+        st.markdown("""
+        ### What you'll see here:
+        - **Learning Statistics**: Total generations, pass rates, average scores
+        - **RL Metrics**: Episode counts, learning rates, strategy performance
+        - **Pattern Analysis**: Common error patterns and recommendations
+        - **Strategy Distribution**: Which generation sources work best
+        - **Import/Export**: Save and load learned state
+        ### ML V2 Capabilities:
+        1. **Reinforcement Learning** with 4 exploration strategies
+        2. **Experience Replay** buffer (10,000 capacity)
+        3. **Eligibility Traces** for better credit assignment
+        4. **Pattern Mining** with N-grams and Association Rules
+        5. **Deep UVM Validation** for factory registration, phases, TLM connections
+        """)
 if generate_btn:
     st.session_state.log_output = []
     st.session_state.last_result = None
     st.session_state.generated_files = {}
+    st.session_state.ml_stats = None
     status_placeholder.info("🔄 Generating UVM testbench...")
     try:
         from src.config import ConfigLoader, PipelineConfig
         from src.pipeline import TBPipeline
         with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False, encoding='utf-8') as f:
             f.write(spec_text)
             spec_path = f.name
+        timestamp = datetime.now().strftime('%H:%M:%S')
+        st.session_state.log_output.append(f"[{timestamp}] Starting generation for: {design_name}")
+        st.session_state.log_output.append(f"[{timestamp}] Model: {model_type}")
+        if model_type == "v2":
+            st.session_state.log_output.append(f"[{timestamp}] RL Strategy: {exploration_strategy}")
+        st.session_state.log_output.append(f"[{timestamp}] ML Enabled: {use_ml}")
+        st.session_state.log_output.append(f"[{timestamp}] Strict Validation: {strict_validation}")
         log_placeholder.code("\n".join(st.session_state.log_output))
         pipeline = TBPipeline()
+        if use_ml:
+            pipeline.cfg.ml.enabled = True
+            pipeline.cfg.ml.model_type = model_type
+            pipeline.cfg.ml.use_llm = False
+            pipeline.cfg.ml.use_semantic_encoder = False
+            pipeline.cfg.ml.use_learning = auto_learn
+            pipeline.cfg.ml.strict_validation = strict_validation
+            if model_type == "v2":
+                pipeline.cfg.ml.exploration_strategy = exploration_strategy
+                if st.session_state.learning_state_path:
+                    pipeline.cfg.ml.learning_storage_path = st.session_state.learning_state_path
+        else:
+            pipeline.cfg.ml.enabled = False
         pipeline.cfg.auto_train.enabled = auto_train
         pipeline.cfg.auto_train.max_iterations = max_iterations
         result = pipeline.run(spec_path)
         try:
             os.unlink(spec_path)
         except:
             pass
         st.session_state.last_result = result
         st.session_state.generated_files = result.get('generated_files', {})
+        try:
+            if hasattr(pipeline.model, 'get_learning_stats'):
+                st.session_state.ml_stats = pipeline.model.get_learning_stats()
+            elif hasattr(pipeline.model, '_rl_learner') and hasattr(pipeline.model, '_pattern_learner'):
+                st.session_state.ml_stats = {
+                    'total_generations': len(st.session_state.log_output),
+                    'rl_learner': pipeline.model._rl_learner.get_performance_stats() if hasattr(pipeline.model._rl_learner, 'get_performance_stats') else {},
+                }
+        except Exception as e:
+            logger.warning(f"Could not get ML stats: {e}")
+        timestamp = datetime.now().strftime('%H:%M:%S')
+        st.session_state.log_output.append(f"[{timestamp}] Generation complete!")
+        st.session_state.log_output.append(f"[{timestamp}] Files generated: {len(st.session_state.generated_files)}")
+        if result.get('passed'):
+            st.session_state.log_output.append(f"[{timestamp}] Status: PASSED ✅")
+        else:
+            st.session_state.log_output.append(f"[{timestamp}] Status: COMPLETED WITH WARNINGS ⚠️")
         log_placeholder.code("\n".join(st.session_state.log_output))
         if result.get('passed'):
             status_placeholder.success("✅ Generation successful!")
         else:
             status_placeholder.warning("⚠️ Generation completed with issues")
     except Exception as e:
+        timestamp = datetime.now().strftime('%H:%M:%S')
+        st.session_state.log_output.append(f"[{timestamp}] ERROR: {str(e)}")
         log_placeholder.code("\n".join(st.session_state.log_output))
         status_placeholder.error(f"❌ Error: {str(e)}")
         import traceback
         st.session_state.log_output.append(traceback.format_exc())
         log_placeholder.code("\n".join(st.session_state.log_output))
 if st.session_state.last_result:
+    with tab_results:
+        result = st.session_state.last_result
+        with metrics_placeholder.container():
+            eval_metrics = result.get('evaluation', {})
+            m1, m2, m3, m4 = st.columns(4)
+            with m1:
+                completeness = eval_metrics.get('completeness', 0) * 100
+                st.metric("Completeness", f"{completeness:.1f}%")
+            with m2:
+                signal_cov = eval_metrics.get('interface_signal_coverage', 0) * 100
+                st.metric("Signal Coverage", f"{signal_cov:.1f}%")
+            with m3:
+                reg_cov = eval_metrics.get('register_coverage', 0) * 100
+                st.metric("Register Coverage", f"{reg_cov:.1f}%")
+            with m4:
+                st.metric("Files Generated", len(st.session_state.generated_files))
+            m5, m6 = st.columns(2)
+            with m5:
+                st.metric("Auto-Train Iterations", result.get('auto_train_iterations', 0))
+            with m6:
+                if result.get('passed'):
+                    st.metric("Status", "✅ PASSED")
+                else:
+                    st.metric("Status", "⚠️ WARNINGS")
+        with files_placeholder.expander("📄 Generated Files", expanded=True):
+            if st.session_state.generated_files:
+                file_names = sorted(st.session_state.generated_files.keys())
+                selected_file = st.selectbox("Select file to preview", file_names, key="file_selector")
+                if selected_file:
+                    file_path = st.session_state.generated_files[selected_file]
+                    if os.path.exists(file_path):
+                        try:
+                            with open(file_path, 'r', encoding='utf-8') as f:
+                                content = f.read()
+                            st.code(content, language='systemverilog')
+                            col1, col2 = st.columns([1, 1])
+                            with col1:
+                                st.download_button(
+                                    f"📥 Download {selected_file}",
+                                    data=content,
+                                    file_name=selected_file,
+                                    mime="text/plain",
+                                    use_container_width=True
+                                )
+                            with col2:
+                                st.info(f"Lines: {len(content.splitlines())} | Size: {len(content)} bytes")
+                        except Exception as e:
+                            st.warning(f"Could not read file: {e}")
+            if st.session_state.generated_files:
+                zip_buffer = io.BytesIO()
+                with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zipf:
+                    for name, path in st.session_state.generated_files.items():
+                        if os.path.exists(path):
+                            zipf.write(path, arcname=name)
+                zip_buffer.seek(0)
+                st.download_button(
+                    label="📦 Download All Files as ZIP",
+                    data=zip_buffer,
+                    file_name=f"{design_name}_uvm_testbench.zip",
+                    mime="application/zip",
+                    use_container_width=True,
+                    type="primary"
+                )
 st.divider()
+footer_col1, footer_col2, footer_col3 = st.columns([1, 2, 1])
+with footer_col2:
+    st.caption("""
+    **UVM Testbench Generator v2.0** • AI-Powered by **Sai Kumar Taraka**
+    🔬 Advanced ML: RL (UCB/Softmax/Thompson) + Pattern Mining + Experience Replay + Eligibility Traces
+    📚 Protocol Libraries: UART, SPI, I2C, AXI4-Lite, APB, Wishbone
+    🎯 Deep UVM Validation: Factory Registration, Phases, TLM Connections, Coverage
+    """)

tests/quick_v2_test.py ADDED Viewed

	@@ -0,0 +1,130 @@

+"""
+Quick smoke test for V2 ML model - final version
+"""
+import sys
+import os
+repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, repo_root)
+from src.config import ConfigLoader, PipelineConfig, MLConfig, GenerationConfig, AutoTrainConfig
+from src.pipeline import TBPipeline
+spec_path = os.path.join(repo_root, "configs", "uart_demo.yaml")
+print("="*60)
+print("V2 ML Model Smoke Test")
+print("="*60)
+print("\n1. Creating pipeline config with V2 model (UCB strategy)...")
+ml_cfg = MLConfig(
+    enabled=True,
+    model_type="v2",
+    exploration_strategy="ucb",
+    use_llm=False,
+    use_semantic_encoder=False,
+    use_learning=True,
+    strict_validation=True
+)
+pipeline_cfg = PipelineConfig(
+    ml=ml_cfg,
+    generation=GenerationConfig(
+        templates_dir=os.path.join(repo_root, "src", "generation", "templates"),
+        output_dir=os.path.join(repo_root, "output"),
+        overwrite=True
+    ),
+    auto_train=AutoTrainConfig(
+        enabled=False,
+        max_iterations=1
+    )
+)
+print(f"   ML enabled: {pipeline_cfg.ml.enabled}")
+print(f"   Model type: {pipeline_cfg.ml.model_type}")
+print(f"   Exploration strategy: {pipeline_cfg.ml.exploration_strategy}")
+print(f"   Strict validation: {pipeline_cfg.ml.strict_validation}")
+print(f"   Auto-train: {pipeline_cfg.auto_train.enabled}")
+print("\n2. Creating pipeline with V2 model...")
+pipeline = TBPipeline(pipeline_cfg)
+print(f"   Model type: {type(pipeline.model).__name__}")
+print("\n3. Running generation with UART demo spec...")
+result = pipeline.run(spec_path)
+print(f"\n   Result passed: {result.get('passed', False)}")
+print(f"   Files generated: {len(result.get('generated_files', {}))}")
+print(f"   Auto-train iterations: {result.get('auto_train_iterations', 0)}")
+if result.get('passed'):
+    print("\n   [OK] Generation PASSED")
+else:
+    print("\n   [WARNING] Generation had issues")
+if result.get('generated_files'):
+    print("\n4. Generated files:")
+    for name, path in result['generated_files'].items():
+        if os.path.exists(path):
+            size = os.path.getsize(path)
+            print(f"   - {name}: {size} bytes")
+if hasattr(pipeline.model, 'get_learning_stats'):
+    print("\n5. ML Learning Stats:")
+    stats = pipeline.model.get_learning_stats()
+    print(f"   - Total generations: {stats.get('total_generations', 0)}")
+    if 'source_distribution' in stats:
+        print(f"   - Source distribution: {stats['source_distribution']}")
+    if 'strategy_weights' in stats:
+        print(f"   - Strategy weights: {stats['strategy_weights']}")
+    if 'rl_learner' in stats:
+        rl = stats['rl_learner']
+        print(f"   - RL episodes: {rl.get('episode_count', 0)}")
+        print(f"   - RL total updates: {rl.get('total_updates', 0)}")
+        print(f"   - RL learning rate: {rl.get('learning_rate', 0.1)}")
+        if 'state_stats' in rl:
+            state_stats = rl['state_stats']
+            if state_stats:
+                print(f"   - RL state stats (first 3):")
+                for state, info in list(state_stats.items())[:3]:
+                    print(f"     * '{state}': best='{info.get('best_action', 'N/A')}', Q={info.get('best_q_value', 0):.3f}")
+eval_metrics = result.get('evaluation', {})
+print("\n6. Evaluation Metrics:")
+for key, value in eval_metrics.items():
+    if isinstance(value, (int, float)):
+        if 0 <= value <= 1:
+            print(f"   - {key}: {value*100:.1f}%")
+        else:
+            print(f"   - {key}: {value}")
+val_results = result.get('validation_results', {})
+if val_results:
+    total_checks = 0
+    total_passed = 0
+    print("\n7. Validation Results (Deep UVM Compliance):")
+    for file_path, file_result in val_results.items():
+        file_name = os.path.basename(file_path)
+        checks = file_result.get('checks', [])
+        for check in checks:
+            total_checks += 1
+            if check.get('passed'):
+                total_passed += 1
+    if total_checks > 0:
+        pass_rate = (total_passed / total_checks) * 100
+        print(f"   - Total checks: {total_checks}")
+        print(f"   - Passed: {total_passed}")
+        print(f"   - Pass rate: {pass_rate:.1f}%")
+print("\n" + "="*60)
+if result.get('passed'):
+    print("TEST PASSED - V2 ML Model working correctly!")
+else:
+    print("TEST COMPLETED - Review warnings above")
+print("="*60)

tests/test_advanced_ml_v2.py ADDED Viewed

	@@ -0,0 +1,477 @@

+"""
+Test script for Advanced ML V2 Model
+Tests: RL strategies, experience replay, eligibility traces, pattern learning, deep validation
+"""
+import sys
+import os
+import tempfile
+import yaml
+repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, repo_root)
+from src.models.enhanced_ml_model_v2 import EnhancedMLGenerationModelV2
+from src.config import PipelineConfig, MLConfig, AutoTrainConfig, GenerationConfig
+TEST_SPEC = """
+design_name: uart
+clock_reset:
+  clock: clk
+  reset: rst_n
+interfaces:
+  - name: wb
+    signals:
+      - name: wb_cyc
+        direction: input
+      - name: wb_stb
+        direction: input
+      - name: wb_we
+        direction: input
+      - name: wb_addr
+        direction: input
+        width: 3
+      - name: wb_data_o
+        direction: output
+        width: 8
+      - name: wb_data_i
+        direction: input
+        width: 8
+      - name: wb_ack
+        direction: output
+  - name: uart
+    signals:
+      - name: uart_tx
+        direction: output
+      - name: uart_rx
+        direction: input
+      - name: cts_n
+        direction: input
+      - name: rts_n
+        direction: output
+      - name: uart_intr
+        direction: output
+registers:
+  - name: RBR_THR
+    address: 0x0
+    description: Receiver Buffer / Transmitter Holding
+    fields:
+      - name: data
+        bits: 7:0
+  - name: IER
+    address: 0x1
+    description: Interrupt Enable
+    fields:
+      - name: erbfi
+        bits: '0'
+        description: Enable RX data available interrupt
+      - name: etbei
+        bits: '1'
+        description: Enable TX holding register empty interrupt
+  - name: LCR
+    address: 0x3
+    description: Line Control
+    fields:
+      - name: wls
+        bits: 1:0
+        description: Word length select
+      - name: dlab
+        bits: '7'
+        description: Divisor latch access bit
+  - name: LSR
+    address: 0x5
+    description: Line Status
+    fields:
+      - name: dr
+        bits: '0'
+        description: Data Ready
+      - name: thre
+        bits: '5'
+        description: TX Holding Register Empty
+protocol: uart
+"""
+def test_rl_strategies():
+    """Test all RL exploration strategies."""
+    print("\n" + "="*60)
+    print("Testing RL Exploration Strategies")
+    print("="*60)
+    strategies = ["epsilon_greedy", "softmax", "ucb", "thompson"]
+    results = {}
+    for strategy in strategies:
+        print(f"\n--- Testing {strategy} strategy ---")
+        cfg = PipelineConfig(
+            ml=MLConfig(
+                enabled=True,
+                model_type="v2",
+                exploration_strategy=strategy,
+                use_llm=False,
+                use_semantic_encoder=False,
+                use_learning=True,
+                learning_storage_path=None
+            )
+        )
+        model = EnhancedMLGenerationModelV2(cfg)
+        spec_dict = yaml.safe_load(TEST_SPEC)
+        result = model.generate(spec_dict)
+        passed = result['passed']
+        generated_files = result.get('generated_files', {})
+        print(f"  Passed: {passed}")
+        print(f"  Files generated: {len(generated_files)}")
+        print(f"  Source: {result.get('source', 'unknown')}")
+        print(f"  Strategy used: {result.get('strategy', 'unknown')}")
+        if hasattr(model, '_rl_learner'):
+            rl_stats = model._rl_learner.get_performance_stats()
+            print(f"  RL episodes: {rl_stats.get('episode_count', 0)}")
+            print(f"  RL total updates: {rl_stats.get('total_updates', 0)}")
+        results[strategy] = {
+            "passed": passed,
+            "files_count": len(generated_files),
+            "source": result.get('source', 'unknown'),
+            "strategy": result.get('strategy', 'unknown')
+        }
+    print("\n--- Strategy Results Summary ---")
+    for strategy, res in results.items():
+        status = "✅" if res["passed"] else "❌"
+        print(f"  {status} {strategy}: {res['files_count']} files, source={res['source']}, strategy={res['strategy']}")
+    return all(r["passed"] for r in results.values())
+def test_experience_replay():
+    """Test experience replay buffer and eligibility traces."""
+    print("\n" + "="*60)
+    print("Testing Experience Replay & Eligibility Traces")
+    print("="*60)
+    cfg = PipelineConfig(
+        ml=MLConfig(
+            enabled=True,
+            model_type="v2",
+            exploration_strategy="ucb",
+            use_llm=False,
+            use_semantic_encoder=False,
+            use_learning=True,
+            learning_storage_path=None
+        )
+    )
+    model = EnhancedMLGenerationModelV2(cfg)
+    spec_dict = yaml.safe_load(TEST_SPEC)
+    print("  Running multiple generations to populate replay buffer...")
+    for i in range(5):
+        result = model.generate(spec_dict)
+        print(f"    Generation {i+1}: passed={result['passed']}, source={result.get('source', 'unknown')}")
+        reward = 1.0 if result['passed'] else 0.0
+        model.learn(result, reward)
+    if hasattr(model, '_rl_learner'):
+        rl = model._rl_learner
+        print(f"\n  Experience replay buffer size: {len(rl._replay_buffer)}")
+        print(f"  Episode count: {rl.get_performance_stats().get('episode_count', 0)}")
+        if hasattr(rl, '_eligibility_traces') and rl._eligibility_traces:
+            print(f"  Eligibility traces tracked: {len(rl._eligibility_traces)}")
+        state_stats = rl.get_state_stats()
+        print(f"\n  State statistics (first 3):")
+        for state, stats in list(state_stats.items())[:3]:
+            print(f"    '{state}': best_action='{stats.get('best_action', 'N/A')}', Q={stats.get('best_q_value', 0):.3f}, visits={stats.get('visit_count', 0)}")
+        return len(rl._replay_buffer) > 0
+    return False
+def test_pattern_learner():
+    """Test advanced pattern learning."""
+    print("\n" + "="*60)
+    print("Testing Advanced Pattern Learner")
+    print("="*60)
+    cfg = PipelineConfig(
+        ml=MLConfig(
+            enabled=True,
+            model_type="v2",
+            exploration_strategy="ucb",
+            use_llm=False,
+            use_semantic_encoder=False,
+            use_learning=True,
+            learning_storage_path=None
+        )
+    )
+    model = EnhancedMLGenerationModelV2(cfg)
+    spec_dict = yaml.safe_load(TEST_SPEC)
+    print("  Running generations for pattern learning...")
+    for i in range(3):
+        result = model.generate(spec_dict)
+        reward = 1.0 if result['passed'] else 0.0
+        model.learn(result, reward)
+    if hasattr(model, '_pattern_learner'):
+        pl = model._pattern_learner
+        stats = pl.get_statistics()
+        print(f"\n  Pattern Learner Stats:")
+        print(f"    Total specs seen: {stats['total_specs_seen']}")
+        print(f"    Total generations: {stats['total_generations']}")
+        print(f"    Average score: {stats['avg_score']:.3f}")
+        print(f"    N-gram vocabulary size: {len(stats['ngram_vocab'])}")
+        print(f"    Association rules: {len(stats['association_rules'])}")
+        recs = pl.get_recommendations(spec_dict)
+        print(f"\n  Recommendations for current spec:")
+        for rec in recs[:5]:
+            print(f"    • {rec}")
+        common = pl.get_common_error_patterns(top_n=5)
+        if common:
+            print(f"\n  Common error patterns:")
+            for pattern, count in common:
+                print(f"    • '{pattern}': {count} occurrences")
+        return True
+    return False
+def test_deep_validation():
+    """Test deep UVM compliance validation."""
+    print("\n" + "="*60)
+    print("Testing Deep UVM Compliance Validation")
+    print("="*60)
+    cfg = PipelineConfig(
+        ml=MLConfig(
+            enabled=True,
+            model_type="v2",
+            exploration_strategy="ucb",
+            use_llm=False,
+            use_semantic_encoder=False,
+            use_learning=True,
+            strict_validation=True,
+            learning_storage_path=None
+        )
+    )
+    model = EnhancedMLGenerationModelV2(cfg)
+    spec_dict = yaml.safe_load(TEST_SPEC)
+    result = model.generate(spec_dict)
+    print(f"\n  Generated files: {len(result.get('generated_files', {}))}")
+    print(f"  Passed: {result['passed']}")
+    val_results = result.get('validation_results', {})
+    if val_results:
+        print(f"\n  Validation Results:")
+        total_checks = 0
+        total_passed = 0
+        for file_path, file_result in val_results.items():
+            file_name = os.path.basename(file_path)
+            checks = file_result.get('checks', [])
+            if checks:
+                print(f"\n    {file_name}:")
+                for check in checks:
+                    total_checks += 1
+                    status = "✅" if check.get('passed', False) else "❌"
+                    if check.get('passed'):
+                        total_passed += 1
+                    msg = f"      {status} {check.get('check_name', 'unknown')}"
+                    if check.get('message'):
+                        msg += f": {check['message']}"
+                    print(msg)
+        if total_checks > 0:
+            pass_rate = (total_passed / total_checks) * 100
+            print(f"\n  Overall validation pass rate: {pass_rate:.1f}% ({total_passed}/{total_checks})")
+        return total_checks > 0
+    return False
+def test_learning_persistence():
+    """Test saving and loading learning state."""
+    print("\n" + "="*60)
+    print("Testing Learning State Persistence")
+    print("="*60)
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+        state_path = f.name
+    try:
+        cfg = PipelineConfig(
+            ml=MLConfig(
+                enabled=True,
+                model_type="v2",
+                exploration_strategy="ucb",
+                use_llm=False,
+                use_semantic_encoder=False,
+                use_learning=True,
+                learning_storage_path=state_path
+            )
+        )
+        print("  Creating model and running generations...")
+        model = EnhancedMLGenerationModelV2(cfg)
+        spec_dict = yaml.safe_load(TEST_SPEC)
+        for i in range(3):
+            result = model.generate(spec_dict)
+            reward = 1.0 if result['passed'] else 0.0
+            model.learn(result, reward)
+        if hasattr(model, '_rl_learner'):
+            episodes_before = model._rl_learner.get_performance_stats().get('episode_count', 0)
+            replay_size_before = len(model._rl_learner._replay_buffer)
+            print(f"  Episodes before save: {episodes_before}")
+            print(f"  Replay buffer size before save: {replay_size_before}")
+        print("  Saving learning state...")
+        model.save_learning_state(state_path)
+        print("  Loading learning state into new model...")
+        model2 = EnhancedMLGenerationModelV2(cfg)
+        model2.load_learning_state(state_path)
+        if hasattr(model2, '_rl_learner'):
+            episodes_after = model2._rl_learner.get_performance_stats().get('episode_count', 0)
+            replay_size_after = len(model2._rl_learner._replay_buffer)
+            print(f"  Episodes after load: {episodes_after}")
+            print(f"  Replay buffer size after load: {replay_size_after}")
+            return episodes_after >= 3 and replay_size_after >= 3
+        return False
+    finally:
+        if os.path.exists(state_path):
+            os.unlink(state_path)
+def test_learning_stats():
+    """Test ML stats generation for UI."""
+    print("\n" + "="*60)
+    print("Testing Learning Statistics (for UI)")
+    print("="*60)
+    cfg = PipelineConfig(
+        ml=MLConfig(
+            enabled=True,
+            model_type="v2",
+            exploration_strategy="ucb",
+            use_llm=False,
+            use_semantic_encoder=False,
+            use_learning=True,
+            learning_storage_path=None
+        )
+    )
+    model = EnhancedMLGenerationModelV2(cfg)
+    spec_dict = yaml.safe_load(TEST_SPEC)
+    for i in range(3):
+        result = model.generate(spec_dict)
+        reward = 1.0 if result['passed'] else 0.0
+        model.learn(result, reward)
+    if hasattr(model, 'get_learning_stats'):
+        stats = model.get_learning_stats()
+        print(f"\n  Learning Stats:")
+        print(f"    Total generations: {stats.get('total_generations', 0)}")
+        if 'source_distribution' in stats:
+            print(f"\n    Source distribution:")
+            for source, count in stats['source_distribution'].items():
+                print(f"      • {source}: {count}")
+        if 'strategy_weights' in stats:
+            print(f"\n    Strategy weights:")
+            for strategy, weight in stats['strategy_weights'].items():
+                print(f"      • {strategy}: {weight}")
+        if 'rl_learner' in stats:
+            print(f"\n    RL Learner stats:")
+            print(f"      Episode count: {stats['rl_learner'].get('episode_count', 0)}")
+            print(f"      Total updates: {stats['rl_learner'].get('total_updates', 0)}")
+        if 'pattern_learner' in stats:
+            print(f"\n    Pattern Learner stats:")
+            print(f"      Total specs seen: {stats['pattern_learner'].get('total_specs_seen', 0)}")
+        return True
+    return False
+def run_all_tests():
+    """Run all tests and report results."""
+    print("\n" + "="*60)
+    print("Advanced ML V2 Model - Complete Test Suite")
+    print("="*60)
+    tests = [
+        ("RL Exploration Strategies", test_rl_strategies),
+        ("Experience Replay & Eligibility Traces", test_experience_replay),
+        ("Advanced Pattern Learner", test_pattern_learner),
+        ("Deep UVM Validation", test_deep_validation),
+        ("Learning State Persistence", test_learning_persistence),
+        ("Learning Statistics (UI)", test_learning_stats),
+    ]
+    results = []
+    for name, test_func in tests:
+        try:
+            result = test_func()
+            results.append((name, result, None))
+        except Exception as e:
+            results.append((name, False, str(e)))
+    print("\n" + "="*60)
+    print("Test Results Summary")
+    print("="*60)
+    all_passed = True
+    for name, result, error in results:
+        if result:
+            print(f"✅ {name}")
+        else:
+            print(f"❌ {name}")
+            all_passed = False
+            if error:
+                print(f"   Error: {error}")
+    print("\n" + "="*60)
+    if all_passed:
+        print("🎉 All tests PASSED!")
+    else:
+        print("⚠️ Some tests FAILED")
+    print("="*60)
+    return all_passed
+if __name__ == "__main__":
+    success = run_all_tests()
+    sys.exit(0 if success else 1)