neurocaster-env / server /verifiers.py
NishithP2004's picture
Upload folder using huggingface_hub
9c75f36 verified
"""Programmatic verifiers and reward breadcrumbs for NeuroCaster."""
from __future__ import annotations
import re
from pathlib import Path
from typing import Dict, Iterable, List, Tuple
NARRATION_RE = re.compile(r"<!--\s*.+?\s*-->", re.DOTALL)
MERMAID_FENCE_RE = re.compile(r"```mermaid\s+(.*?)```", re.DOTALL | re.IGNORECASE)
def result(name: str, passed: bool, reward: float = 0.0, message: str = "", fatal: bool = False) -> Dict[str, object]:
return {
"name": name,
"passed": passed,
"reward": reward,
"message": message,
"fatal": fatal,
}
def component(name: str, value: float, reason: str) -> Dict[str, object]:
return {"name": name, "value": value, "reason": reason}
def extract_mermaid_fences(markdown: str) -> List[str]:
return [match.strip() for match in MERMAID_FENCE_RE.findall(markdown or "")]
def verify_narration_tags(markdown: str) -> Dict[str, object]:
if NARRATION_RE.search(markdown or ""):
return result("narration_tags", True, 0.0, "Speaker-note HTML comments found")
return result(
"narration_tags",
False,
-0.50,
"Missing required <!-- narration --> speaker-note comments",
fatal=True,
)
def verify_audio_anchor(markdown: str, expected_path: str) -> Dict[str, object]:
if expected_path and expected_path in (markdown or ""):
return result("audio_anchor", True, 0.0, "Static expressive audio anchor referenced")
return result(
"audio_anchor",
False,
-0.50,
f"Missing or invalid audio anchor; expected {expected_path}",
fatal=True,
)
def verify_semantic_criteria(markdown: str, hidden_criteria: Iterable[str]) -> Dict[str, object]:
text = (markdown or "").lower()
missed = []
for criterion in hidden_criteria:
keywords = [
token
for token in re.findall(r"[a-zA-Z][a-zA-Z0-9_]+", criterion.lower())
if token not in {"must", "include", "explain", "the", "and", "with", "one"}
]
if keywords and not any(keyword in text for keyword in keywords):
missed.append(criterion)
if missed:
return result("semantic_match", False, -0.05 * len(missed), f"Missed criteria: {missed}")
return result("semantic_match", True, 0.05, "All hidden criteria matched by fast keyword verifier")
def verify_slide_structure(markdown: str) -> Dict[str, object]:
slides = [slide.strip() for slide in re.split(r"^---\s*$", markdown or "", flags=re.MULTILINE) if slide.strip()]
if not slides:
return result("slide_structure", False, -0.10, "No slides detected")
for index, slide in enumerate(slides, start=1):
visible_lines = [line for line in slide.splitlines() if not line.strip().startswith("<!--")]
word_count = len(re.findall(r"\w+", "\n".join(visible_lines)))
if len(visible_lines) > 18 or word_count > 140:
return result("slide_structure", False, -0.10, f"Slide {index} likely overflows")
return result("slide_structure", True, 0.0, "Slide density within deterministic limits")
def verify_mermaid_embeds(markdown: str, rendered_paths: Iterable[str]) -> Dict[str, object]:
paths = [path for path in rendered_paths if path]
if not paths:
return result("mermaid_embed", False, 0.0, "No rendered Mermaid diagram paths available")
if any(path in (markdown or "") or Path(path).name in (markdown or "") for path in paths):
return result("mermaid_embed", True, 0.10, "Rendered Mermaid diagram embedded in slides")
return result("mermaid_embed", False, 0.0, "Mermaid rendered but not embedded in slide markdown")
def summarize_results(results: Iterable[Dict[str, object]]) -> Tuple[float, bool]:
reward = sum(float(item.get("reward", 0.0)) for item in results)
fatal = any(bool(item.get("fatal")) and not bool(item.get("passed")) for item in results)
return reward, fatal