"""Programmatic verifiers and reward breadcrumbs for NeuroCaster.""" from __future__ import annotations import re from pathlib import Path from typing import Dict, Iterable, List, Tuple NARRATION_RE = re.compile(r"", re.DOTALL) MERMAID_FENCE_RE = re.compile(r"```mermaid\s+(.*?)```", re.DOTALL | re.IGNORECASE) def result(name: str, passed: bool, reward: float = 0.0, message: str = "", fatal: bool = False) -> Dict[str, object]: return { "name": name, "passed": passed, "reward": reward, "message": message, "fatal": fatal, } def component(name: str, value: float, reason: str) -> Dict[str, object]: return {"name": name, "value": value, "reason": reason} def extract_mermaid_fences(markdown: str) -> List[str]: return [match.strip() for match in MERMAID_FENCE_RE.findall(markdown or "")] def verify_narration_tags(markdown: str) -> Dict[str, object]: if NARRATION_RE.search(markdown or ""): return result("narration_tags", True, 0.0, "Speaker-note HTML comments found") return result( "narration_tags", False, -0.50, "Missing required speaker-note comments", fatal=True, ) def verify_audio_anchor(markdown: str, expected_path: str) -> Dict[str, object]: if expected_path and expected_path in (markdown or ""): return result("audio_anchor", True, 0.0, "Static expressive audio anchor referenced") return result( "audio_anchor", False, -0.50, f"Missing or invalid audio anchor; expected {expected_path}", fatal=True, ) def verify_semantic_criteria(markdown: str, hidden_criteria: Iterable[str]) -> Dict[str, object]: text = (markdown or "").lower() missed = [] for criterion in hidden_criteria: keywords = [ token for token in re.findall(r"[a-zA-Z][a-zA-Z0-9_]+", criterion.lower()) if token not in {"must", "include", "explain", "the", "and", "with", "one"} ] if keywords and not any(keyword in text for keyword in keywords): missed.append(criterion) if missed: return result("semantic_match", False, -0.05 * len(missed), f"Missed criteria: {missed}") return result("semantic_match", True, 0.05, "All hidden criteria matched by fast keyword verifier") def verify_slide_structure(markdown: str) -> Dict[str, object]: slides = [slide.strip() for slide in re.split(r"^---\s*$", markdown or "", flags=re.MULTILINE) if slide.strip()] if not slides: return result("slide_structure", False, -0.10, "No slides detected") for index, slide in enumerate(slides, start=1): visible_lines = [line for line in slide.splitlines() if not line.strip().startswith("