Spaces:

NishithP2004
/

neurocaster-env

Paused

App Files Files Community

neurocaster-env / server /verifiers.py

NishithP2004

Upload folder using huggingface_hub

9c75f36 verified about 1 month ago

raw

history blame contribute delete

3.96 kB

	"""Programmatic verifiers and reward breadcrumbs for NeuroCaster."""

	from __future__ import annotations

	import re
	from pathlib import Path
	from typing import Dict, Iterable, List, Tuple


	NARRATION_RE = re.compile(r"<!--\s.+?\s-->", re.DOTALL)
	MERMAID_FENCE_RE = re.compile(r"```mermaid\s+(.*?)```", re.DOTALL \| re.IGNORECASE)


	def result(name: str, passed: bool, reward: float = 0.0, message: str = "", fatal: bool = False) -> Dict[str, object]:
	return {
	"name": name,
	"passed": passed,
	"reward": reward,
	"message": message,
	"fatal": fatal,
	}


	def component(name: str, value: float, reason: str) -> Dict[str, object]:
	return {"name": name, "value": value, "reason": reason}


	def extract_mermaid_fences(markdown: str) -> List[str]:
	return [match.strip() for match in MERMAID_FENCE_RE.findall(markdown or "")]


	def verify_narration_tags(markdown: str) -> Dict[str, object]:
	if NARRATION_RE.search(markdown or ""):
	return result("narration_tags", True, 0.0, "Speaker-note HTML comments found")
	return result(
	"narration_tags",
	False,
	-0.50,
	"Missing required <!-- narration --> speaker-note comments",
	fatal=True,
	)


	def verify_audio_anchor(markdown: str, expected_path: str) -> Dict[str, object]:
	if expected_path and expected_path in (markdown or ""):
	return result("audio_anchor", True, 0.0, "Static expressive audio anchor referenced")
	return result(
	"audio_anchor",
	False,
	-0.50,
	f"Missing or invalid audio anchor; expected {expected_path}",
	fatal=True,
	)


	def verify_semantic_criteria(markdown: str, hidden_criteria: Iterable[str]) -> Dict[str, object]:
	text = (markdown or "").lower()
	missed = []
	for criterion in hidden_criteria:
	keywords = [
	token
	for token in re.findall(r"[a-zA-Z][a-zA-Z0-9_]+", criterion.lower())
	if token not in {"must", "include", "explain", "the", "and", "with", "one"}
	]
	if keywords and not any(keyword in text for keyword in keywords):
	missed.append(criterion)

	if missed:
	return result("semantic_match", False, -0.05 * len(missed), f"Missed criteria: {missed}")
	return result("semantic_match", True, 0.05, "All hidden criteria matched by fast keyword verifier")


	def verify_slide_structure(markdown: str) -> Dict[str, object]:
	slides = [slide.strip() for slide in re.split(r"^---\s*$", markdown or "", flags=re.MULTILINE) if slide.strip()]
	if not slides:
	return result("slide_structure", False, -0.10, "No slides detected")

	for index, slide in enumerate(slides, start=1):
	visible_lines = [line for line in slide.splitlines() if not line.strip().startswith("<!--")]
	word_count = len(re.findall(r"\w+", "\n".join(visible_lines)))
	if len(visible_lines) > 18 or word_count > 140:
	return result("slide_structure", False, -0.10, f"Slide {index} likely overflows")
	return result("slide_structure", True, 0.0, "Slide density within deterministic limits")


	def verify_mermaid_embeds(markdown: str, rendered_paths: Iterable[str]) -> Dict[str, object]:
	paths = [path for path in rendered_paths if path]
	if not paths:
	return result("mermaid_embed", False, 0.0, "No rendered Mermaid diagram paths available")
	if any(path in (markdown or "") or Path(path).name in (markdown or "") for path in paths):
	return result("mermaid_embed", True, 0.10, "Rendered Mermaid diagram embedded in slides")
	return result("mermaid_embed", False, 0.0, "Mermaid rendered but not embedded in slide markdown")


	def summarize_results(results: Iterable[Dict[str, object]]) -> Tuple[float, bool]:
	reward = sum(float(item.get("reward", 0.0)) for item in results)
	fatal = any(bool(item.get("fatal")) and not bool(item.get("passed")) for item in results)
	return reward, fatal