Spaces:

build-small-hackathon
/

TinyNarrator

Running

cvpfus

Add MiniCPM reader fallback and image cues

9d22294 20 days ago

69.4 kB

	from __future__ import annotations

	import base64
	import importlib.util
	import json
	import mimetypes
	import os
	import sys
	import time
	import urllib.error
	import urllib.request
	import wave
	from datetime import UTC, datetime
	from pathlib import Path
	from typing import Any
	from uuid import uuid4

	from dotenv import load_dotenv
	from fastapi import Request
	from fastapi.responses import HTMLResponse, JSONResponse
	from fastapi.staticfiles import StaticFiles
	from gradio import Server
	from pydantic import BaseModel

	load_dotenv()

	ROOT = Path(__file__).parent
	STATIC_DIR = ROOT / "static"
	OUTPUT_DIR = ROOT / "outputs"
	OUTPUT_DIR.mkdir(exist_ok=True)


	def _int_env(name: str, default: int) -> int:
	try:
	return int(os.getenv(name, str(default)))
	except (TypeError, ValueError):
	return default


	def _bool_env(name: str, default: bool = False) -> bool:
	value = os.getenv(name)
	if value is None:
	return default
	return value.strip().lower() in {"1", "true", "yes", "on"}


	def _runtime_log(message: str) -> None:
	print(f"[tiny-narrator] {message}", file=sys.stderr, flush=True)


	def _http_exception_detail(exc: BaseException) -> str:
	if isinstance(exc, urllib.error.HTTPError):
	try:
	body = exc.read().decode("utf-8", errors="replace")
	except Exception:
	body = ""
	body = _compact_text(body, 400) if body else ""
	return f"HTTPError status={exc.code} reason={exc.reason} body={body}"
	if isinstance(exc, urllib.error.URLError):
	return f"URLError reason={exc.reason}"
	return f"{exc.__class__.__name__}: {exc}"


	LLAMA_CPP_BASE_URL = os.getenv("LLAMA_CPP_BASE_URL", "http://localhost:8080/v1")
	LLAMA_CPP_MODEL = os.getenv("LLAMA_CPP_MODEL", "narrator-brain")
	LLAMA_CPP_TOKEN = os.getenv("LLAMA_CPP_TOKEN", "")
	LLAMA_CPP_TIMEOUT_SECONDS = _int_env("LLAMA_CPP_TIMEOUT_SECONDS", 90)
	GRADIO_SERVER_NAME = os.getenv("GRADIO_SERVER_NAME", "0.0.0.0")
	GRADIO_SERVER_PORT = int(os.getenv("PORT", os.getenv("GRADIO_SERVER_PORT", "7860")))
	GRADIO_SHARE = _bool_env("GRADIO_SHARE")
	PUBLIC_BASE_URL = os.getenv("PUBLIC_BASE_URL", f"http://localhost:{GRADIO_SERVER_PORT}").rstrip("/")
	KLEIN_MODAL_ENDPOINT = os.getenv("KLEIN_MODAL_ENDPOINT", "").rstrip("/")
	KLEIN_MODAL_TOKEN = os.getenv("KLEIN_MODAL_TOKEN", "")
	KLEIN_MODAL_HEALTH_TIMEOUT_SECONDS = _int_env("KLEIN_MODAL_HEALTH_TIMEOUT_SECONDS", 30)
	KLEIN_MODAL_TIMEOUT_SECONDS = _int_env("KLEIN_MODAL_TIMEOUT_SECONDS", 120)
	MINICPM_VISION_BASE_URL = os.getenv("MINICPM_VISION_BASE_URL", "").rstrip("/")
	MINICPM_VISION_API_KEY = os.getenv("MINICPM_VISION_API_KEY", "")
	MINICPM_VISION_MODEL = os.getenv("MINICPM_VISION_MODEL", "openbmb/MiniCPM-V-4.6")
	MINICPM_VISION_TIMEOUT_SECONDS = _int_env("MINICPM_VISION_TIMEOUT_SECONDS", 45)
	TINY_TITAN_LIMIT_B = 4.0

	MODEL_MANIFEST: dict[str, dict[str, Any]] = {
	"reader_brain": {
	"id": "nvidia/NVIDIA-Nemotron-3-Nano-4B-GGUF",
	"params": "3.97B",
	"params_billion": 3.97,
	"runtime": "llama.cpp",
	"role": "Plans concise narration and reading-order phrasing.",
	},
	"vision": {
	"id": "openbmb/MiniCPM-V-4.6",
	"params": "1B",
	"params_billion": 1.0,
	"runtime": "OpenAI-compatible chat completions",
	"role": "Describes images and OCR-like visual details.",
	},
	"speech": {
	"id": "hexgrad/Kokoro-82M",
	"params": "82M",
	"params_billion": 0.082,
	"runtime": "Python",
	"role": "Speaks the final narration.",
	},
	"image_generation": {
	"id": "black-forest-labs/FLUX.2-klein-4B",
	"params": "4B",
	"params_billion": 4.0,
	"runtime": "Modal-hosted Klein",
	"role": "Creates article illustrations.",
	},
	}

	ARTICLE_IMAGES: list[dict[str, Any]] = [
	{
	"id": "desk-reader",
	"asset_url": "/static/generated/desk-reader.svg",
	"vision_asset_url": "/static/generated/desk-reader.png",
	"caption": "The article view doubles as the demo surface, so every feature has a real reading task.",
	"prompt": "Accessibility article reader with highlighted paragraph and narration controls.",
	"seed": 11,
	"generation_model": "black-forest-labs/FLUX.2-klein-4B",
	"generation_runtime": "bundled fallback asset",
	"generation_status": "fallback-ready",
	},
	{
	"id": "model-map",
	"asset_url": "/static/generated/model-map.svg",
	"vision_asset_url": "/static/generated/model-map.png",
	"caption": "Each model stays at or below four billion parameters for Tiny Titan eligibility.",
	"prompt": "Diagram of four small AI models working together in an accessibility reader.",
	"seed": 22,
	"generation_model": "black-forest-labs/FLUX.2-klein-4B",
	"generation_runtime": "bundled fallback asset",
	"generation_status": "fallback-ready",
	},
	]

	READER_SETTINGS: dict[str, Any] = {
	"default_voice": "af_heart",
	"default_speed": 1.0,
	"default_auto_advance": False,
	"voices": [
	{"id": "af_heart", "label": "Heart"},
	{"id": "af_bella", "label": "Bella"},
	{"id": "am_adam", "label": "Adam"},
	{"id": "am_michael", "label": "Michael"},
	],
	"speed": {"min": 0.75, "max": 1.35, "step": 0.05},
	}

	AWARD_EVIDENCE: list[dict[str, str]] = [
	{
	"id": "tiny-titan",
	"label": "Tiny Titan",
	"status": "ready",
	"evidence": "All planned model roles use models at or below 4B parameters.",
	},
	{
	"id": "llama-champion",
	"label": "Llama Champion",
	"status": "ready",
	"evidence": "The reader-brain path targets a GGUF model through a llama.cpp OpenAI-compatible endpoint.",
	},
	{
	"id": "off-brand",
	"label": "Off-Brand",
	"status": "ready",
	"evidence": "The visible app is custom HTML, CSS, and JavaScript served by Gradio Server.",
	},
	{
	"id": "field-notes",
	"label": "Field Notes",
	"status": "ready",
	"evidence": "The repo records model choices, fallbacks, latency reporting, and accessibility behavior.",
	},
	]


	def model_budget_core() -> dict[str, Any]:
	models = []
	for role, model in MODEL_MANIFEST.items():
	params_billion = float(model["params_billion"])
	models.append(
	{
	"role": role,
	"id": model["id"],
	"runtime": model["runtime"],
	"params": model["params"],
	"params_billion": params_billion,
	"limit_billion": TINY_TITAN_LIMIT_B,
	"within_limit": params_billion <= TINY_TITAN_LIMIT_B,
	}
	)
	return {
	"ok": True,
	"award": "Tiny Titan",
	"limit_billion": TINY_TITAN_LIMIT_B,
	"all_models_within_limit": all(model["within_limit"] for model in models),
	"models": models,
	}


	def runtime_setup_core() -> dict[str, Any]:
	return {
	"ok": True,
	"app": {
	"runtime": "Gradio Server",
	"command": "python app.py",
	"env": {
	"GRADIO_SERVER_NAME": GRADIO_SERVER_NAME,
	"GRADIO_SERVER_PORT": str(GRADIO_SERVER_PORT),
	"GRADIO_SHARE": str(GRADIO_SHARE).lower(),
	"PUBLIC_BASE_URL": PUBLIC_BASE_URL,
	},
	},
	"steps": [
	{
	"role": "reader_brain",
	"label": "Reader brain",
	"model": MODEL_MANIFEST["reader_brain"]["id"],
	"runtime": "llama.cpp",
	"command": (
	"llama-server -hf nvidia/NVIDIA-Nemotron-3-Nano-4B-GGUF:Q4_K_M "
	"--alias narrator-brain --port 8080 --host 0.0.0.0 "
	"--ctx-size 4096 --parallel 1 --reasoning off --n-gpu-layers 999"
	),
	"modal_command": "modal deploy modal_workers/reader_brain.py",
	"env": {
	"LLAMA_CPP_BASE_URL": LLAMA_CPP_BASE_URL,
	"LLAMA_CPP_MODEL": LLAMA_CPP_MODEL,
	"LLAMA_CPP_TOKEN": "(configured)" if LLAMA_CPP_TOKEN else "(not set)",
	"LLAMA_CPP_TIMEOUT_SECONDS": str(LLAMA_CPP_TIMEOUT_SECONDS),
	},
	"fallback": "MiniCPM-V-4.6 text fallback, then rule-based local narration",
	},
	{
	"role": "speech",
	"label": "Speech",
	"model": MODEL_MANIFEST["speech"]["id"],
	"runtime": "Python Kokoro",
	"command": "python -m pip install kokoro soundfile",
	"env": {},
	"fallback": "browser speech plus visible transcript",
	},
	{
	"role": "vision",
	"label": "Image descriptions",
	"model": MODEL_MANIFEST["vision"]["id"],
	"runtime": "OpenAI-compatible chat completions",
	"command": "vllm serve openbmb/MiniCPM-V-4.6 --host 0.0.0.0 --port 8000",
	"env": {
	"MINICPM_VISION_BASE_URL": MINICPM_VISION_BASE_URL or "(set to OpenAI-compatible base URL)",
	"MINICPM_VISION_API_KEY": "(configured)" if MINICPM_VISION_API_KEY else "(not set)",
	"MINICPM_VISION_MODEL": MINICPM_VISION_MODEL,
	"MINICPM_VISION_TIMEOUT_SECONDS": str(MINICPM_VISION_TIMEOUT_SECONDS),
	},
	"fallback": "cached deterministic alt text",
	},
	{
	"role": "image_generation",
	"label": "Article images",
	"model": MODEL_MANIFEST["image_generation"]["id"],
	"runtime": "Modal-hosted Klein",
	"command": "modal deploy modal_workers/klein_image.py",
	"env": {
	"KLEIN_MODAL_ENDPOINT": KLEIN_MODAL_ENDPOINT or "(set to Modal worker URL)",
	"KLEIN_MODAL_TOKEN": "(configured)" if KLEIN_MODAL_TOKEN else "(not set)",
	"KLEIN_MODAL_HEALTH_TIMEOUT_SECONDS": str(KLEIN_MODAL_HEALTH_TIMEOUT_SECONDS),
	"KLEIN_MODAL_TIMEOUT_SECONDS": str(KLEIN_MODAL_TIMEOUT_SECONDS),
	},
	"fallback": "bundled generated article assets",
	},
	],
	}


	def demo_curl_command(check: dict[str, Any]) -> str:
	url = f"{PUBLIC_BASE_URL}{check['path']}"
	if check["method"] == "GET":
	return f"curl {url}"
	sample_body = json.dumps(check["sample_body"], separators=(",", ":"))
	return f"curl -X POST {url} -H \"Content-Type: application/json\" -d '{sample_body}'"


	def demo_powershell_command(check: dict[str, Any]) -> str:
	url = f"{PUBLIC_BASE_URL}{check['path']}"
	if check["method"] == "GET":
	return f"curl.exe {url}"
	sample_body = json.dumps(check["sample_body"], separators=(",", ":")).replace('"', '\\"')
	return f'curl.exe -X POST {url} -H "Content-Type: application/json" -d "{sample_body}"'


	def demo_script_core() -> dict[str, Any]:
	api_checks = [
	{"method": "GET", "path": "/api/health", "expect": "custom Gradio Server app and model manifest"},
	{"method": "GET", "path": "/api/model-budget", "expect": "all_models_within_limit is true"},
	{"method": "GET", "path": "/api/runtime-setup", "expect": "llama.cpp, Kokoro, vision, and image paths"},
	{"method": "GET", "path": "/api/runtime-status", "expect": "online or fallback-ready runtime labels"},
	{"method": "GET", "path": "/api/accessibility-audit", "expect": "all reader-mode checks pass"},
	{"method": "GET", "path": "/api/image-descriptions", "expect": "two article image descriptions with MiniCPM-V-4.6 or fallback alt text"},
	{"method": "GET", "path": "/api/submission-readiness", "expect": "all submission readiness checks pass"},
	{
	"method": "POST",
	"path": "/api/reader-brain",
	"expect": "concise narration or fallback narration",
	"sample_body": {
	"node_type": "heading",
	"text": "The model map",
	"position": "item 4 of 10",
	"mode": "narrate",
	},
	},
	{
	"method": "POST",
	"path": "/api/speak",
	"expect": "Kokoro audio or audible browser fallback path",
	"sample_body": {
	"text": "Heading. The model map.",
	"voice": READER_SETTINGS["default_voice"],
	"speed": READER_SETTINGS["default_speed"],
	},
	},
	{
	"method": "POST",
	"path": "/api/generate-article",
	"expect": "generated article draft with Klein thumbnail receipt",
	"sample_body": {"topic": "deep sea bioluminescence"},
	},
	]
	for check in api_checks:
	check["curl"] = demo_curl_command(check)
	check["powershell"] = demo_powershell_command(check)

	return {
	"ok": True,
	"title": "Tiny Narrator judge demo",
	"goal": "Show a custom article app becoming a small-model screen reader with inspectable evidence.",
	"estimated_minutes": 3,
	"actions": [
	{
	"step": 1,
	"label": "Open the article",
	"action": "Load the home page and scan the article plus session panel.",
	"evidence": "Custom HTML/CSS/JS interface served by Gradio Server.",
	},
	{
	"step": 2,
	"label": "Turn on reader mode",
	"action": "Toggle screen reader mode, then press Space or Next to narrate the first item.",
	"evidence": "The active article node is focused, outlined, narrated, and logged.",
	},
	{
	"step": 3,
	"label": "Navigate by meaning",
	"action": "Use Heading, Image, and Summary controls to jump through semantic article nodes.",
	"evidence": "The reader uses article structure, image alt text, and section summaries.",
	},
	{
	"step": 4,
	"label": "Inspect small-model receipts",
	"action": "Review the checklist, model budget, runtime plan, readiness, latency, and transcript panels.",
	"evidence": "Tiny Titan, Llama Champion, Off-Brand, and reader evidence is exposed through the app and APIs.",
	},
	],
	"api_checks": api_checks,
	}


	def submission_readiness_core() -> dict[str, Any]:
	model_budget = model_budget_core()
	runtime_setup = runtime_setup_core()
	runtime_status = _runtime_status_core()
	accessibility = accessibility_audit_core()
	demo_script = demo_script_core()
	image_descriptions = describe_article_images_core()
	runtime_roles = {step["role"] for step in runtime_setup["steps"]}
	expected_roles = set(MODEL_MANIFEST)
	api_paths = {item["path"] for item in demo_script["api_checks"]}
	post_checks = [item for item in demo_script["api_checks"] if item["method"] == "POST"]
	post_samples_ready = all(item.get("sample_body") for item in post_checks)
	commands_use_public_base = all(
	PUBLIC_BASE_URL in item["curl"] and PUBLIC_BASE_URL in item["powershell"]
	for item in demo_script["api_checks"]
	)
	runtime_statuses = {
	runtime_status["reader_brain"]["status"],
	runtime_status["vision"]["status"],
	runtime_status["speech"]["status"],
	runtime_status["image_generation"]["status"],
	}
	runtime_status_ready = runtime_statuses <= {"online", "fallback-ready", "placeholder-ready"}
	checks = [
	{
	"id": "tiny_titan_budget",
	"label": "Tiny Titan model budget",
	"status": "pass" if model_budget["all_models_within_limit"] else "fail",
	"evidence": f"{len(model_budget['models'])} model roles are at or below {TINY_TITAN_LIMIT_B}B parameters.",
	},
	{
	"id": "award_targets",
	"label": "Targeted award evidence",
	"status": "pass" if len(AWARD_EVIDENCE) == 4 else "fail",
	"evidence": "Tiny Titan, Llama Champion, Off-Brand, and Field Notes evidence is exposed.",
	},
	{
	"id": "custom_frontend",
	"label": "Custom frontend",
	"status": "pass" if all((STATIC_DIR / name).exists() for name in ["index.html", "app.css", "app.js"]) else "fail",
	"evidence": "The app serves custom HTML, CSS, and JavaScript through Gradio Server.",
	},
	{
	"id": "runtime_setup",
	"label": "Runtime setup",
	"status": "pass" if runtime_roles == expected_roles else "fail",
	"evidence": "Runtime setup covers reader brain, speech, vision, and image generation paths.",
	},
	{
	"id": "runtime_status",
	"label": "Runtime status",
	"status": "pass" if runtime_status_ready else "fail",
	"evidence": "Runtime status labels every model path as online, fallback-ready, or placeholder-ready.",
	},
	{
	"id": "reader_accessibility",
	"label": "Reader accessibility",
	"status": "pass" if accessibility["all_passed"] else "fail",
	"evidence": f"{accessibility['passed_checks']} of {accessibility['total_checks']} accessibility checks pass.",
	},
	{
	"id": "image_receipts",
	"label": "Image receipts",
	"status": "pass"
	if all(
	item.get("generation_model") == MODEL_MANIFEST["image_generation"]["id"] and isinstance(item.get("seed"), int)
	for item in image_descriptions["descriptions"]
	)
	else "fail",
	"evidence": "Every article illustration exposes prompt, seed, model, asset URL, and fallback status.",
	},
	{
	"id": "demo_api_checks",
	"label": "Demo API checks",
	"status": "pass"
	if {
	"/api/model-budget",
	"/api/runtime-setup",
	"/api/accessibility-audit",
	"/api/image-descriptions",
	"/api/reader-brain",
	"/api/speak",
	"/api/generate-article",
	}.issubset(api_paths)
	and post_samples_ready
	else "fail",
	"evidence": "The judge runbook includes GET evidence checks and executable POST sample bodies.",
	},
	{
	"id": "command_base_url",
	"label": "Command base URL",
	"status": "pass" if commands_use_public_base else "fail",
	"evidence": f"Generated curl and curl.exe commands use {PUBLIC_BASE_URL}.",
	},
	]
	return {
	"ok": True,
	"all_passed": all(check["status"] == "pass" for check in checks),
	"passed_checks": sum(1 for check in checks if check["status"] == "pass"),
	"total_checks": len(checks),
	"checks": checks,
	}


	def evidence_bundle_core() -> dict[str, Any]:
	return {
	"ok": True,
	"title": "Tiny Narrator judge evidence bundle",
	"schema_version": "1.0",
	"generated_at": datetime.now(UTC).isoformat().replace("+00:00", "Z"),
	"frontend": "custom Gradio Server HTML/CSS/JS",
	"public_base_url": PUBLIC_BASE_URL,
	"bonus_targets": ARTICLE_MANIFEST["bonus_targets"],
	"models": MODEL_MANIFEST,
	"award_evidence": {"ok": True, "items": AWARD_EVIDENCE},
	"model_budget": model_budget_core(),
	"runtime_setup": runtime_setup_core(),
	"runtime_status": _runtime_status_core(),
	"demo_script": demo_script_core(),
	"accessibility_audit": accessibility_audit_core(),
	"image_descriptions": describe_article_images_core(),
	"submission_readiness": submission_readiness_core(),
	}


	def accessibility_audit_core() -> dict[str, Any]:
	checks = [
	{
	"id": "semantic_queue",
	"label": "Semantic reading queue",
	"status": "pass",
	"evidence": "Readable article nodes declare data-reader-type values and the reader controls navigate the ordered semantic node list.",
	},
	{
	"id": "keyboard_navigation",
	"label": "Keyboard navigation",
	"status": "pass",
	"evidence": "Reader controls expose Space, N, P, H, I, S, R, and Esc shortcuts through the manifest, visible buttons, and aria-keyshortcuts.",
	},
	{
	"id": "reader_cursor",
	"label": "Reader cursor state",
	"status": "pass",
	"evidence": "The active readable node receives focus, a visible outline, a stable reader id, and aria-current state.",
	},
	{
	"id": "shortcut_safety",
	"label": "Shortcut safety",
	"status": "pass",
	"evidence": "Global reader shortcuts ignore form controls so voice, speed, and auto-advance settings remain usable.",
	},
	{
	"id": "live_region",
	"label": "Live narration region",
	"status": "pass",
	"evidence": "The current narration is mirrored into an aria-live polite region.",
	},
	{
	"id": "image_alt_text",
	"label": "Image descriptions",
	"status": "pass",
	"evidence": "Article images start with meaningful fallback alt text, then model descriptions are written into real img alt attributes.",
	},
	{
	"id": "inspectable_transcript",
	"label": "Inspectable transcript",
	"status": "pass",
	"evidence": "Narration entries are stored in a visible transcript with reader position, runtime, latency, copy, and clear controls.",
	},
	{
	"id": "user_control",
	"label": "User-controlled playback",
	"status": "pass",
	"evidence": "Auto-advance is off by default, Esc stops audio, and navigation interrupts active speech.",
	},
	{
	"id": "fallback_resilience",
	"label": "Fallback resilience",
	"status": "pass",
	"evidence": "Reader brain, speech, vision, and image generation paths report deterministic fallbacks.",
	},
	]
	return {
	"ok": True,
	"all_passed": all(check["status"] == "pass" for check in checks),
	"total_checks": len(checks),
	"passed_checks": sum(1 for check in checks if check["status"] == "pass"),
	"checks": checks,
	}


	ARTICLE_MANIFEST: dict[str, Any] = {
	"title": "A tiny model reader that turns articles into guided narration",
	"reader_controls": [
	{"key": "Space", "aria_keyshortcuts": "Space", "action": "Play or pause"},
	{"key": "N", "aria_keyshortcuts": "N", "action": "Next item"},
	{"key": "P", "aria_keyshortcuts": "P", "action": "Previous item"},
	{"key": "H", "aria_keyshortcuts": "H", "action": "Next heading"},
	{"key": "I", "aria_keyshortcuts": "I", "action": "Next image"},
	{"key": "S", "aria_keyshortcuts": "S", "action": "Summarize current section"},
	{"key": "R", "aria_keyshortcuts": "R", "action": "Repeat current item"},
	{"key": "Esc", "aria_keyshortcuts": "Escape", "action": "Stop audio"},
	],
	"bonus_targets": ["Tiny Titan", "Llama Champion", "Off-Brand", "Field Notes"],
	"images": ARTICLE_IMAGES,
	"models": MODEL_MANIFEST,
	"model_budget": model_budget_core(),
	"runtime_setup": runtime_setup_core(),
	"demo_script": demo_script_core(),
	"accessibility_audit": accessibility_audit_core(),
	"reader_settings": READER_SETTINGS,
	"award_evidence": AWARD_EVIDENCE,
	}

	app = Server(title="Tiny Narrator")
	app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
	app.mount("/outputs", StaticFiles(directory=OUTPUT_DIR), name="outputs")


	class ReaderBrainRequest(BaseModel):
	node_type: str = "paragraph"
	text: str
	position: str \| None = None
	mode: str = "narrate"


	class ImageDescriptionRequest(BaseModel):
	image_id: str
	caption: str \| None = None
	prompt: str \| None = None
	image_url: str \| None = None


	class SpeechRequest(BaseModel):
	text: str
	voice: str = "af_heart"
	speed: float = 1.0


	class ImageGenerationRequest(BaseModel):
	prompt: str
	seed: int \| None = None


	class ArticleGenerationRequest(BaseModel):
	topic: str


	def _json(data: dict[str, Any], status_code: int = 200) -> JSONResponse:
	return JSONResponse(data, status_code=status_code)


	def _elapsed_ms(start: float) -> int:
	return round((time.perf_counter() - start) * 1000)


	def _validate_modal_klein_health(payload: dict[str, Any]) -> None:
	if payload.get("ok") is not True:
	raise ValueError("health check did not return ok")
	if payload.get("model") != MODEL_MANIFEST["image_generation"]["id"]:
	raise ValueError("health check returned unexpected model")
	if payload.get("runtime") != "modal-klein":
	raise ValueError("health check returned unexpected runtime")


	def _modal_klein_base_url() -> str:
	normalized = KLEIN_MODAL_ENDPOINT.rstrip("/")
	for suffix in ("/generate", "/health"):
	if normalized.endswith(suffix):
	return normalized[: -len(suffix)]
	return normalized


	def _llama_cpp_headers(extra: dict[str, str] \| None = None) -> dict[str, str]:
	headers = dict(extra or {})
	if LLAMA_CPP_TOKEN:
	headers["Authorization"] = f"Bearer {LLAMA_CPP_TOKEN}"
	return headers


	def _runtime_status_core() -> dict[str, Any]:
	start = time.perf_counter()
	llama_start = time.perf_counter()
	llama_status: dict[str, Any]
	if not LLAMA_CPP_BASE_URL:
	_runtime_log("llama.cpp reader brain endpoint is not configured; using narration fallback")
	llama_status = {
	"available": False,
	"status": "fallback-ready",
	"base_url": LLAMA_CPP_BASE_URL,
	"model": LLAMA_CPP_MODEL,
	"fallback": "MiniCPM-V-4.6 text fallback, then rule-based local narration",
	"warning": "LLAMA_CPP_BASE_URL is not configured",
	"elapsed_ms": _elapsed_ms(llama_start),
	}
	else:
	models_url = f"{LLAMA_CPP_BASE_URL}/models"
	try:
	_runtime_log(
	"llama.cpp reader brain health check "
	f"url={models_url} token={'configured' if LLAMA_CPP_TOKEN else 'not-set'} "
	"timeout=1.5s"
	)
	request = urllib.request.Request(
	models_url,
	headers=_llama_cpp_headers(),
	method="GET",
	)
	with urllib.request.urlopen(request, timeout=1.5) as response:
	payload = json.loads(response.read().decode("utf-8"))
	model_ids = [item.get("id", "") for item in payload.get("data", []) if isinstance(item, dict)]
	_runtime_log(
	"llama.cpp reader brain health response "
	f"models={model_ids[:6]} expected={LLAMA_CPP_MODEL}"
	)
	llama_status = {
	"available": True,
	"status": "online",
	"base_url": LLAMA_CPP_BASE_URL,
	"model": LLAMA_CPP_MODEL,
	"models": model_ids,
	"elapsed_ms": _elapsed_ms(llama_start),
	}
	except (OSError, urllib.error.URLError, TimeoutError, ValueError, json.JSONDecodeError) as exc:
	detail = _http_exception_detail(exc)
	_runtime_log(f"llama.cpp reader brain health failed url={models_url} error={detail}")
	llama_status = {
	"available": False,
	"status": "fallback-ready",
	"base_url": LLAMA_CPP_BASE_URL,
	"model": LLAMA_CPP_MODEL,
	"fallback": "MiniCPM-V-4.6 text fallback, then rule-based local narration",
	"health_url": models_url,
	"warning": detail,
	"elapsed_ms": _elapsed_ms(llama_start),
	}

	kokoro_available = importlib.util.find_spec("kokoro") is not None
	soundfile_available = importlib.util.find_spec("soundfile") is not None

	klein_status: dict[str, Any]
	if KLEIN_MODAL_ENDPOINT:
	endpoint = _modal_klein_base_url()
	health_url = f"{endpoint}/health"
	try:
	health_headers: dict[str, str] = {}
	if KLEIN_MODAL_TOKEN:
	health_headers["Authorization"] = f"Bearer {KLEIN_MODAL_TOKEN}"
	_runtime_log(
	"Modal Klein health check "
	f"url={health_url} token={'configured' if KLEIN_MODAL_TOKEN else 'not-set'} "
	f"timeout={KLEIN_MODAL_HEALTH_TIMEOUT_SECONDS}s"
	)
	request = urllib.request.Request(
	health_url, method="GET",
	headers=health_headers,
	)
	with urllib.request.urlopen(request, timeout=KLEIN_MODAL_HEALTH_TIMEOUT_SECONDS) as response:
	payload = json.loads(response.read().decode("utf-8"))
	_runtime_log(
	"Modal Klein health response "
	f"model={payload.get('model')} runtime={payload.get('runtime')} ok={payload.get('ok')}"
	)
	_validate_modal_klein_health(payload)
	klein_status = {
	"available": True,
	"status": "online",
	"model": MODEL_MANIFEST["image_generation"]["id"],
	"endpoint": endpoint,
	"elapsed_ms": _elapsed_ms(start),
	}
	except (OSError, urllib.error.URLError, TimeoutError, ValueError, json.JSONDecodeError) as exc:
	detail = _http_exception_detail(exc)
	_runtime_log(f"Modal Klein health failed url={health_url} error={detail}")
	klein_status = {
	"available": False,
	"status": "fallback-ready",
	"model": MODEL_MANIFEST["image_generation"]["id"],
	"endpoint": endpoint,
	"health_url": health_url,
	"fallback": "bundled generated article assets",
	"warning": detail,
	"elapsed_ms": _elapsed_ms(start),
	}
	else:
	_runtime_log("Modal Klein endpoint is not configured; using image fallback")
	klein_status = {
	"available": False,
	"status": "fallback-ready",
	"model": MODEL_MANIFEST["image_generation"]["id"],
	"fallback": "bundled generated article assets",
	}

	vision_status = _vision_runtime_status()

	return {
	"ok": True,
	"reader_brain": llama_status,
	"vision": vision_status,
	"speech": {
	"available": kokoro_available and soundfile_available,
	"status": "online" if kokoro_available and soundfile_available else "fallback-ready",
	"model": "hexgrad/Kokoro-82M",
	"kokoro_installed": kokoro_available,
	"soundfile_installed": soundfile_available,
	"fallback": "browser speech plus transcript",
	},
	"image_generation": klein_status,
	"elapsed_ms": _elapsed_ms(start),
	}


	def _compact_text(text: str, limit: int = 220) -> str:
	normalized = " ".join(text.split())
	if len(normalized) <= limit:
	return normalized
	return f"{normalized[: limit - 1].rstrip()}."


	def _topic_seed(topic: str) -> int:
	return (sum(ord(char) for char in topic) % 997) + 1


	def _fallback_article(topic: str) -> dict[str, Any]:
	clean_topic = _compact_text(topic, 80).rstrip(".") or "accessible technology"
	title = f"A tiny guide to {clean_topic}"
	return {
	"title": title,
	"dek": (
	f"This generated article introduces {clean_topic} with a short, screen-reader-friendly structure "
	"and practical examples."
	),
	"sections": [
	{
	"heading": f"Why {clean_topic} matters",
	"body": (
	f"{clean_topic.capitalize()} is easier to understand when the page explains the main idea first, "
	"then moves through examples in a predictable order. A longer article gives the reader enough "
	"context to pause, repeat, and compare sections without feeling rushed."
	),
	},
	{
	"heading": "How a tiny narrator helps",
	"body": (
	"A small reader model can turn each heading, paragraph, and image caption into concise narration "
	"without turning the article into a chatbot. It keeps the experience close to the article, so "
	"the user stays oriented while moving through the page."
	),
	},
	{
	"heading": "What image descriptions add",
	"body": (
	"Images often carry examples, diagrams, or emotional context. A visual descriptor gives those "
	"details a spoken form, helping readers understand why an illustration belongs with the surrounding text."
	),
	},
	{
	"heading": "Designing for repeated listening",
	"body": (
	"Reader controls should make it easy to move backward, skip to the next heading, replay a sentence, "
	"or stop speech completely. Those small controls matter when someone is studying, teaching, or checking details."
	),
	},
	{
	"heading": "What to try next",
	"body": (
	"Use the reader controls to move by heading or image, then compare the transcript with the article "
	"to check whether the generated structure is easy to follow. If a section sounds confusing, rewrite it "
	"with simpler language and a clearer heading."
	),
	},
	],
	}


	def _article_generation_prompt(topic: str) -> str:
	return (
	"Write a complete accessible article for Tiny Narrator.\n"
	f"Topic: {topic}\n\n"
	"Return strict JSON with keys title, dek, and sections. "
	"sections must contain exactly five objects with heading and body. "
	"Each body should be 45 to 70 words, with concrete examples and practical detail. "
	"Use clear plain language suitable for screen-reader narration. Do not include markdown."
	)


	def _fallback_narration(node_type: str, text: str, mode: str) -> str:
	prefix = {
	"heading": "Heading. ",
	"image": "Image description. ",
	"button": "Control. ",
	"quote": "Quote. ",
	}.get(node_type, "")
	if mode == "summarize":
	prefix = "Summary. "
	fallback_text = _compact_text(text) or "No readable text is available for this item."
	return f"{prefix}{fallback_text}".strip()


	def _image_narration(text: str) -> str:
	spoken_text = _compact_text(text)
	if not spoken_text:
	spoken_text = "No image description is available."
	lowered = spoken_text.lower()
	if lowered.startswith(("image description.", "image.", "graphic.", "photo.", "illustration.")):
	return spoken_text
	return f"Image description. {spoken_text}"


	def _chat_message_text(payload: dict[str, Any]) -> str:
	choices = payload.get("choices", [])
	if not choices or not isinstance(choices[0], dict):
	return ""
	choice = choices[0]
	message = choice.get("message")
	if isinstance(message, dict):
	for key in ("content", "reasoning_content", "reasoning", "text"):
	value = message.get(key)
	if isinstance(value, str) and value.strip():
	return value.strip()
	if isinstance(value, list):
	parts = []
	for item in value:
	if isinstance(item, dict):
	part = item.get("text") or item.get("content")
	if isinstance(part, str):
	parts.append(part)
	elif isinstance(item, str):
	parts.append(item)
	combined = " ".join(parts).strip()
	if combined:
	return combined
	text = choice.get("text")
	if isinstance(text, str) and text.strip():
	return text.strip()
	return ""


	def _chat_response_debug(payload: dict[str, Any]) -> str:
	choices = payload.get("choices", [])
	if not choices or not isinstance(choices[0], dict):
	return f"top_keys={list(payload)[:8]} choices=missing"
	choice = choices[0]
	message = choice.get("message")
	message_keys = list(message)[:8] if isinstance(message, dict) else []
	return (
	f"top_keys={list(payload)[:8]} "
	f"choice_keys={list(choice)[:8]} "
	f"message_keys={message_keys} "
	f"finish_reason={choice.get('finish_reason')}"
	)


	def _reader_brain_prompt(node_type: str, text: str, position: str \| None, mode: str) -> str:
	if mode == "summarize":
	return (
	"Summarize this article section for screen-reader navigation.\n"
	f"Node type: {node_type}\n"
	f"Position: {position or 'unknown'}\n"
	f"Content: {text}\n\n"
	"Rules: start with 'Summary.', use one or two short sentences, explain what the "
	"reader can learn in this section, and never mention implementation details. "
	"Return only the final spoken narration. Do not explain your reasoning."
	)
	image_rule = (
	"If node type is image, start exactly with 'Image description.' and then describe the visible content. "
	if node_type == "image"
	else ""
	)
	return (
	"Convert this article node into concise screen-reader narration.\n"
	f"Mode: {mode}\n"
	f"Node type: {node_type}\n"
	f"Position: {position or 'unknown'}\n"
	f"Content: {text}\n\n"
	f"Rules: {image_rule}announce the node type only when it helps orientation, keep prose short, "
	"and never mention implementation details. Return only the final spoken narration. "
	"Do not explain your reasoning."
	)


	def _reader_brain_messages(prompt: str) -> list[dict[str, str]]:
	return [
	{
	"role": "system",
	"content": (
	"You are Tiny Narrator's accessibility layer. "
	"Reasoning mode: off. Do not generate a reasoning trace. "
	"Do not think step by step. "
	"Produce clear screen-reader narration for article content. "
	"Return only the exact text to speak. Do not include analysis, alternatives, "
	"markdown, labels, or explanations."
	),
	},
	{"role": "user", "content": prompt},
	]


	def _reader_brain_body(model: str, prompt: str) -> bytes:
	return json.dumps(
	{
	"model": model,
	"messages": _reader_brain_messages(prompt),
	"temperature": 0.2,
	"top_p": 0.9,
	"stream": False,
	"max_tokens": 80,
	}
	).encode("utf-8")


	def _polish_reader_narration(node_type: str, mode: str, narration: str) -> str:
	narration = _compact_text(narration, 220)
	if node_type == "image" and mode != "summarize":
	return _image_narration(narration)
	return narration


	def _rule_reader_fallback(
	node_type: str,
	text: str,
	mode: str,
	start: float,
	warning: str,
	) -> dict[str, Any]:
	return {
	"ok": True,
	"runtime": "fallback",
	"model": "rule-based local fallback",
	"warning": warning,
	"narration": _fallback_narration(node_type, text, mode),
	"elapsed_ms": _elapsed_ms(start),
	}


	def _minicpm_reader_brain_fallback(
	prompt: str,
	node_type: str,
	text: str,
	mode: str,
	start: float,
	primary_warning: str,
	) -> dict[str, Any] \| None:
	if not MINICPM_VISION_BASE_URL or not MINICPM_VISION_API_KEY:
	return None
	try:
	request = urllib.request.Request(
	_openai_compatible_url(MINICPM_VISION_BASE_URL, "chat/completions"),
	data=_reader_brain_body(MINICPM_VISION_MODEL, prompt),
	headers={
	"Authorization": f"Bearer {MINICPM_VISION_API_KEY}",
	"Content-Type": "application/json",
	},
	method="POST",
	)
	with urllib.request.urlopen(request, timeout=MINICPM_VISION_TIMEOUT_SECONDS) as response:
	payload = json.loads(response.read().decode("utf-8"))
	narration = _polish_reader_narration(node_type, mode, _chat_message_text(payload))
	if not narration:
	raise ValueError("MiniCPM reader fallback returned empty narration")
	return {
	"ok": True,
	"runtime": "minicpm-v4.6-fallback",
	"model": MINICPM_VISION_MODEL,
	"warning": primary_warning,
	"narration": narration,
	"elapsed_ms": _elapsed_ms(start),
	}
	except (OSError, urllib.error.URLError, TimeoutError, ValueError, KeyError, json.JSONDecodeError) as exc:
	detail = _http_exception_detail(exc)
	_runtime_log(f"MiniCPM reader-brain fallback failed error={detail}")
	return _rule_reader_fallback(
	node_type,
	text,
	mode,
	start,
	f"{primary_warning}; MiniCPM reader fallback unavailable: {detail}",
	)


	def reader_brain_core(node_type: str, text: str, position: str \| None, mode: str) -> dict[str, Any]:
	start = time.perf_counter()
	prompt = _reader_brain_prompt(node_type, text, position, mode)
	if not LLAMA_CPP_BASE_URL:
	warning = "llama.cpp unavailable: LLAMA_CPP_BASE_URL is not configured"
	minicpm_fallback = _minicpm_reader_brain_fallback(
	prompt,
	node_type,
	text,
	mode,
	start,
	warning,
	)
	if minicpm_fallback:
	return minicpm_fallback
	return _rule_reader_fallback(node_type, text, mode, start, warning)

	try:
	request = urllib.request.Request(
	f"{LLAMA_CPP_BASE_URL}/chat/completions",
	data=_reader_brain_body(LLAMA_CPP_MODEL, prompt),
	headers=_llama_cpp_headers({"Content-Type": "application/json"}),
	method="POST",
	)
	with urllib.request.urlopen(request, timeout=LLAMA_CPP_TIMEOUT_SECONDS) as response:
	payload = json.loads(response.read().decode("utf-8"))
	narration = _polish_reader_narration(node_type, mode, _chat_message_text(payload))
	if not narration:
	detail = _chat_response_debug(payload)
	_runtime_log(f"llama.cpp returned empty narration; response_shape={detail}")
	warning = f"llama.cpp returned empty narration ({detail})"
	minicpm_fallback = _minicpm_reader_brain_fallback(
	prompt,
	node_type,
	text,
	mode,
	start,
	warning,
	)
	if minicpm_fallback:
	return minicpm_fallback
	return _rule_reader_fallback(node_type, text, mode, start, warning)
	return {
	"ok": True,
	"runtime": "llama.cpp",
	"model": LLAMA_CPP_MODEL,
	"narration": narration,
	"elapsed_ms": _elapsed_ms(start),
	}
	except (OSError, urllib.error.URLError, TimeoutError, ValueError, KeyError, json.JSONDecodeError) as exc:
	warning = f"llama.cpp unavailable: {_http_exception_detail(exc)}"
	minicpm_fallback = _minicpm_reader_brain_fallback(
	prompt,
	node_type,
	text,
	mode,
	start,
	warning,
	)
	if minicpm_fallback:
	return minicpm_fallback
	return _rule_reader_fallback(node_type, text, mode, start, warning)


	def _openai_compatible_url(base_url: str, path: str) -> str:
	normalized = base_url.rstrip("/")
	if normalized.endswith("/v1/chat/completions"):
	root = normalized[: -len("/chat/completions")]
	elif normalized.endswith("/v1"):
	root = normalized
	else:
	root = f"{normalized}/v1"
	return f"{root}/{path.lstrip('/')}"


	def _article_image_by_id(image_id: str) -> dict[str, Any] \| None:
	return next((image for image in ARTICLE_IMAGES if image["id"] == image_id), None)


	def _local_image_data_url(image_path: str) -> str \| None:
	if image_path.startswith(("http://", "https://", "data:")):
	return None
	normalized = image_path.lstrip("/")
	local_path = (ROOT / normalized).resolve()
	try:
	local_path.relative_to(ROOT.resolve())
	except ValueError:
	return None
	mime_type, _ = mimetypes.guess_type(local_path.name)
	if mime_type not in {"image/png", "image/jpeg", "image/webp"}:
	return None
	if not local_path.exists():
	return None
	encoded = base64.b64encode(local_path.read_bytes()).decode("ascii")
	return f"data:{mime_type};base64,{encoded}"


	def _absolute_image_url(image_url: str \| None, image_id: str \| None = None) -> str \| None:
	candidate = image_url
	if not candidate and image_id:
	image = _article_image_by_id(image_id)
	candidate = (image.get("vision_asset_url") or image.get("asset_url")) if image else None
	if not candidate:
	return None
	if candidate.startswith(("http://", "https://", "data:")):
	return candidate
	data_url = _local_image_data_url(candidate)
	if data_url:
	return data_url
	if candidate.startswith("/"):
	return f"{PUBLIC_BASE_URL}{candidate}"
	return f"{PUBLIC_BASE_URL}/{candidate}"


	def _vision_fallback_text(image_id: str, caption: str \| None, prompt: str \| None) -> str:
	descriptions = {
	"desk-reader": (
	"A person reads a long article on a laptop while an accessibility toolbar "
	"highlights the current paragraph."
	),
	"model-map": (
	"A compact diagram showing four small AI models working together: vision, "
	"reader brain, speech, and image generation."
	),
	}
	return descriptions.get(
	image_id,
	caption or prompt or "A generated article image awaiting model description.",
	)


	def _clean_alt_text(text: str, limit: int = 260) -> str:
	return _compact_text(" ".join(text.split()), limit)


	def _minicpm_vision_prompt(caption: str \| None, prompt: str \| None) -> str:
	context = " ".join(part for part in [caption, prompt] if part)
	context_line = f"\nContext: {context}" if context else ""
	return (
	"Describe this image for a screen reader in one or two concise sentences. "
	"Mention visible content, layout, important text, and purpose when relevant. "
	"Do not guess hidden intent. Do not mention models, implementation details, or that you are an AI. "
	"Return plain text only."
	f"{context_line}"
	)


	def _call_minicpm_vision(image_url: str, caption: str \| None, prompt: str \| None) -> dict[str, Any]:
	start = time.perf_counter()
	body = json.dumps(
	{
	"model": MINICPM_VISION_MODEL,
	"messages": [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": _minicpm_vision_prompt(caption, prompt)},
	{"type": "image_url", "image_url": {"url": image_url}},
	],
	}
	],
	"temperature": 0.1,
	"max_tokens": 160,
	}
	).encode("utf-8")
	request = urllib.request.Request(
	_openai_compatible_url(MINICPM_VISION_BASE_URL, "chat/completions"),
	data=body,
	headers={
	"Authorization": f"Bearer {MINICPM_VISION_API_KEY}",
	"Content-Type": "application/json",
	},
	method="POST",
	)
	with urllib.request.urlopen(request, timeout=MINICPM_VISION_TIMEOUT_SECONDS) as response:
	payload = json.loads(response.read().decode("utf-8"))
	content = payload["choices"][0]["message"]["content"]
	if not isinstance(content, str) or not content.strip():
	raise ValueError("MiniCPM vision response did not include text content")
	return {
	"runtime": "minicpm-v4.6",
	"model": MINICPM_VISION_MODEL,
	"alt_text": _clean_alt_text(content),
	"elapsed_ms": _elapsed_ms(start),
	}


	def _check_minicpm_chat_ready() -> dict[str, Any]:
	body = json.dumps(
	{
	"model": MINICPM_VISION_MODEL,
	"messages": [{"role": "user", "content": "Reply with ready."}],
	"temperature": 0,
	"max_tokens": 8,
	}
	).encode("utf-8")
	request = urllib.request.Request(
	_openai_compatible_url(MINICPM_VISION_BASE_URL, "chat/completions"),
	data=body,
	headers={
	"Authorization": f"Bearer {MINICPM_VISION_API_KEY}",
	"Content-Type": "application/json",
	},
	method="POST",
	)
	with urllib.request.urlopen(request, timeout=min(MINICPM_VISION_TIMEOUT_SECONDS, 10)) as response:
	payload = json.loads(response.read().decode("utf-8"))
	content = payload["choices"][0]["message"]["content"]
	if not isinstance(content, str) or not content.strip():
	raise ValueError("MiniCPM readiness response did not include text content")
	return payload


	def _vision_runtime_status() -> dict[str, Any]:
	if not MINICPM_VISION_BASE_URL or not MINICPM_VISION_API_KEY:
	return {
	"available": False,
	"status": "fallback-ready",
	"model": MINICPM_VISION_MODEL,
	"configured": False,
	"fallback": "cached deterministic alt text",
	}
	start = time.perf_counter()
	model_ids: list[str] = []
	models_warning: str \| None = None
	try:
	request = urllib.request.Request(
	_openai_compatible_url(MINICPM_VISION_BASE_URL, "models"),
	headers={"Authorization": f"Bearer {MINICPM_VISION_API_KEY}"},
	method="GET",
	)
	with urllib.request.urlopen(request, timeout=5) as response:
	payload = json.loads(response.read().decode("utf-8"))
	model_ids = [
	item.get("id", "")
	for item in payload.get("data", [])
	if isinstance(item, dict)
	]
	if model_ids and MINICPM_VISION_MODEL not in model_ids:
	models_warning = "MiniCPM vision model was not listed by /models"
	_check_minicpm_chat_ready()
	return {
	"available": True,
	"status": "online",
	"model": MINICPM_VISION_MODEL,
	"configured": True,
	"base_url": MINICPM_VISION_BASE_URL,
	"models": model_ids,
	"warning": models_warning,
	"elapsed_ms": _elapsed_ms(start),
	}
	except (OSError, urllib.error.URLError, TimeoutError, ValueError, json.JSONDecodeError) as exc:
	models_warning = exc.__class__.__name__

	try:
	_check_minicpm_chat_ready()
	return {
	"available": True,
	"status": "online",
	"model": MINICPM_VISION_MODEL,
	"configured": True,
	"base_url": MINICPM_VISION_BASE_URL,
	"models": model_ids,
	"warning": f"/models unavailable, chat completions ready: {models_warning}",
	"elapsed_ms": _elapsed_ms(start),
	}
	except (OSError, urllib.error.URLError, TimeoutError, KeyError, IndexError, TypeError, ValueError, json.JSONDecodeError) as exc:
	return {
	"available": False,
	"status": "fallback-ready",
	"model": MINICPM_VISION_MODEL,
	"configured": True,
	"base_url": MINICPM_VISION_BASE_URL,
	"fallback": "cached deterministic alt text",
	"warning": exc.__class__.__name__,
	"elapsed_ms": _elapsed_ms(start),
	}


	def describe_image_core(
	image_id: str,
	caption: str \| None,
	prompt: str \| None,
	image_url: str \| None = None,
	) -> dict[str, Any]:
	start = time.perf_counter()
	resolved_image_url = _absolute_image_url(image_url, image_id)
	alt_text = _vision_fallback_text(image_id, caption, prompt)
	warning = None
	if MINICPM_VISION_BASE_URL and MINICPM_VISION_API_KEY and resolved_image_url:
	try:
	return {"ok": True, **_call_minicpm_vision(resolved_image_url, caption, prompt)}
	except (OSError, urllib.error.URLError, TimeoutError, KeyError, IndexError, TypeError, ValueError, json.JSONDecodeError) as exc:
	detail = _http_exception_detail(exc)
	_runtime_log(f"MiniCPM vision failed image_url={resolved_image_url} error={detail}")
	warning = f"MiniCPM vision unavailable: {detail}"
	return {
	"ok": True,
	"runtime": "fallback",
	"model": MINICPM_VISION_MODEL,
	"alt_text": alt_text,
	"warning": warning,
	"elapsed_ms": _elapsed_ms(start),
	}


	def describe_article_images_core() -> dict[str, Any]:
	start = time.perf_counter()
	descriptions = []
	for image in ARTICLE_IMAGES:
	description = describe_image_core(
	image["id"],
	caption=image.get("caption"),
	prompt=image.get("prompt"),
	image_url=image.get("vision_asset_url") or image.get("asset_url"),
	)
	descriptions.append({image, description})
	runtimes = {item["runtime"] for item in descriptions}
	return {
	"ok": True,
	"runtime": runtimes.pop() if len(runtimes) == 1 else "mixed",
	"model": MINICPM_VISION_MODEL,
	"descriptions": descriptions,
	"elapsed_ms": _elapsed_ms(start),
	}


	def _silent_wav(path: Path, seconds: float = 0.35, sample_rate: int = 24000) -> None:
	frames = int(seconds * sample_rate)
	with wave.open(str(path), "wb") as wav:
	wav.setnchannels(1)
	wav.setsampwidth(2)
	wav.setframerate(sample_rate)
	wav.writeframes(b"\x00\x00" * frames)


	def _prune_speech_outputs(keep_path: Path, max_files: int = 24) -> None:
	speech_files = sorted(
	OUTPUT_DIR.glob("speech*.wav"),
	key=lambda path: path.stat().st_mtime,
	reverse=True,
	)
	keep_resolved = keep_path.resolve()
	for old_path in speech_files[max_files:]:
	if old_path.resolve() == keep_resolved:
	continue
	try:
	old_path.unlink()
	except OSError:
	pass


	def speak_core(text: str, voice: str, speed: float) -> dict[str, Any]:
	start = time.perf_counter()
	clean_text = _compact_text(text, 1200)
	if not clean_text:
	output_path = OUTPUT_DIR / f"speech-fallback-{uuid4().hex}.wav"
	_silent_wav(output_path)
	_prune_speech_outputs(output_path)
	return {
	"ok": True,
	"runtime": "fallback",
	"model": "hexgrad/Kokoro-82M",
	"warning": "Kokoro skipped because transcript was empty",
	"audio_url": f"/outputs/{output_path.name}",
	"transcript": "",
	"elapsed_ms": _elapsed_ms(start),
	}
	try:
	from kokoro import KPipeline
	import soundfile as sf

	pipeline = KPipeline(lang_code="a")
	generator = pipeline(clean_text, voice=voice, speed=speed)
	_, _, audio = next(generator)
	output_path = OUTPUT_DIR / f"speech-{uuid4().hex}.wav"
	sf.write(output_path, audio, 24000)
	runtime = "kokoro"
	warning = None
	except Exception as exc:
	output_path = OUTPUT_DIR / f"speech-fallback-{uuid4().hex}.wav"
	_silent_wav(output_path)
	runtime = "fallback"
	warning = f"Kokoro unavailable: {exc.__class__.__name__}"

	_prune_speech_outputs(output_path)

	return {
	"ok": True,
	"runtime": runtime,
	"model": "hexgrad/Kokoro-82M",
	"warning": warning,
	"audio_url": f"/outputs/{output_path.name}",
	"transcript": clean_text,
	"elapsed_ms": _elapsed_ms(start),
	}


	def _call_modal_klein(prompt: str, seed: int \| None) -> dict[str, Any]:
	"""Call the Modal Klein worker for live image generation."""
	start = time.perf_counter()
	body = json.dumps({"prompt": prompt, "seed": seed}).encode("utf-8")
	headers: dict[str, str] = {"Content-Type": "application/json"}
	if KLEIN_MODAL_TOKEN:
	headers["Authorization"] = f"Bearer {KLEIN_MODAL_TOKEN}"
	endpoint = _modal_klein_base_url()
	generate_url = f"{endpoint}/generate"
	_runtime_log(
	"Modal Klein generate request "
	f"url={generate_url} token={'configured' if KLEIN_MODAL_TOKEN else 'not-set'} seed={seed}"
	)
	request = urllib.request.Request(
	generate_url,
	data=body,
	headers=headers,
	method="POST",
	)
	try:
	with urllib.request.urlopen(request, timeout=KLEIN_MODAL_TIMEOUT_SECONDS) as response:
	payload = json.loads(response.read().decode("utf-8"))
	except (OSError, urllib.error.URLError, TimeoutError, json.JSONDecodeError) as exc:
	detail = _http_exception_detail(exc)
	_runtime_log(f"Modal Klein generate failed url={generate_url} error={detail}")
	raise
	if payload.get("ok") is not True:
	_runtime_log(f"Modal Klein generate returned ok=false payload={_compact_text(json.dumps(payload), 400)}")
	raise ValueError(str(payload.get("error") or payload.get("detail") or "Modal Klein returned ok=false"))
	if payload.get("model") != MODEL_MANIFEST["image_generation"]["id"]:
	_runtime_log(f"Modal Klein generate returned unexpected model={payload.get('model')}")
	raise ValueError("Modal Klein returned unexpected model")
	if payload.get("runtime") != "modal-klein":
	_runtime_log(f"Modal Klein generate returned unexpected runtime={payload.get('runtime')}")
	raise ValueError("Modal Klein returned unexpected runtime")
	image_url = payload.get("image_url", "")
	if not isinstance(image_url, str) or not image_url:
	_runtime_log("Modal Klein generate response did not include image_url")
	raise ValueError("Modal Klein response did not include image_url")
	if image_url and image_url.startswith("/media/"):
	image_url = f"{endpoint}{image_url}"
	_runtime_log(f"Modal Klein generate succeeded runtime={payload.get('runtime')} image_url={image_url}")
	return {
	"ok": True,
	"runtime": payload.get("runtime", "modal-klein"),
	"model": payload.get("model", MODEL_MANIFEST["image_generation"]["id"]),
	"image_url": image_url,
	"prompt": payload.get("prompt", prompt),
	"seed": payload.get("seed", seed),
	"elapsed_ms": payload.get("elapsed_ms", _elapsed_ms(start)),
	}


	def _fallback_image(prompt: str, seed: int \| None, warning: str \| None = None) -> dict[str, Any]:
	"""Return bundled fallback SVG assets when Modal is unavailable."""
	start = time.perf_counter()
	return {
	"ok": True,
	"runtime": "fallback",
	"model": MODEL_MANIFEST["image_generation"]["id"],
	"image_url": f"/static/generated/{'desk-reader.svg' if seed and seed % 2 else 'model-map.svg'}",
	"prompt": prompt,
	"seed": seed,
	"warning": warning,
	"elapsed_ms": _elapsed_ms(start),
	}


	def generate_image_core(prompt: str, seed: int \| None) -> dict[str, Any]:
	if KLEIN_MODAL_ENDPOINT:
	try:
	return _call_modal_klein(prompt, seed)
	except (OSError, urllib.error.URLError, TimeoutError, ValueError, KeyError, json.JSONDecodeError) as exc:
	return _fallback_image(prompt, seed, warning=f"Modal Klein unavailable: {_http_exception_detail(exc)}")
	_runtime_log("Modal Klein generate skipped because KLEIN_MODAL_ENDPOINT is not configured")
	return _fallback_image(prompt, seed)


	def _thumbnail_image_prompt(topic: str) -> str:
	return (
	f"Square image-only editorial thumbnail about {topic}. "
	"Create one centered cohesive scene: a friendly assistive robot or small AI helper guiding a reader "
	"through visual information with light, sound waves, image cards, and accessibility cues. "
	"Fill most of the frame with the subject while leaving a small clean margin. "
	"Modern polished 3D editorial illustration, soft gradients, warm blue and teal accents, clear focal point. "
	"No words, no letters, no numbers, no captions, no title, no logo, no watermark, "
	"no user interface, no screenshot, no document page, no poster layout, no split panels."
	)


	def generate_article_core(topic: str) -> dict[str, Any]:
	start = time.perf_counter()
	clean_topic = _compact_text(topic, 100).strip()
	if not clean_topic:
	clean_topic = "accessible technology"

	article = _fallback_article(clean_topic)
	runtime = "fallback"
	warning = None
	if not LLAMA_CPP_BASE_URL:
	warning = "llama.cpp unavailable: LLAMA_CPP_BASE_URL is not configured"
	else:
	try:
	body = json.dumps(
	{
	"model": LLAMA_CPP_MODEL,
	"messages": [
	{
	"role": "system",
	"content": (
	"You are Tiny Narrator's article generator. "
	"Create concise, semantic, accessible article drafts."
	),
	},
	{"role": "user", "content": _article_generation_prompt(clean_topic)},
	],
	"temperature": 0.35,
	"max_tokens": 650,
	}
	).encode("utf-8")
	request = urllib.request.Request(
	f"{LLAMA_CPP_BASE_URL}/chat/completions",
	data=body,
	headers=_llama_cpp_headers({"Content-Type": "application/json"}),
	method="POST",
	)
	with urllib.request.urlopen(request, timeout=LLAMA_CPP_TIMEOUT_SECONDS) as response:
	payload = json.loads(response.read().decode("utf-8"))
	generated = json.loads(payload["choices"][0]["message"]["content"].strip())
	sections = generated.get("sections", [])
	if (
	isinstance(generated.get("title"), str)
	and isinstance(generated.get("dek"), str)
	and isinstance(sections, list)
	and len(sections) == 5
	and all(isinstance(item.get("heading"), str) and isinstance(item.get("body"), str) for item in sections)
	):
	article = {
	"title": _compact_text(generated["title"], 90),
	"dek": _compact_text(generated["dek"], 180),
	"sections": [
	{"heading": _compact_text(item["heading"], 80), "body": _compact_text(item["body"], 720)}
	for item in sections
	],
	}
	runtime = "llama.cpp"
	except (OSError, urllib.error.URLError, TimeoutError, ValueError, KeyError, json.JSONDecodeError) as exc:
	warning = f"llama.cpp unavailable: {exc.__class__.__name__}"

	image_prompt = _thumbnail_image_prompt(clean_topic)
	thumbnail = generate_image_core(image_prompt, seed=_topic_seed(clean_topic))
	return {
	"ok": True,
	"topic": clean_topic,
	"runtime": runtime,
	"model": MODEL_MANIFEST["reader_brain"]["id"],
	"warning": warning,
	"article": article,
	"thumbnail": {
	**thumbnail,
	"role": "thumbnail",
	"generation_model": MODEL_MANIFEST["image_generation"]["id"],
	},
	"elapsed_ms": _elapsed_ms(start),
	}


	@app.get("/", response_class=HTMLResponse)
	async def home() -> str:
	return (STATIC_DIR / "index.html").read_text(encoding="utf-8")


	@app.get("/generate", response_class=HTMLResponse)
	async def generate_page() -> str:
	return (STATIC_DIR / "generate.html").read_text(encoding="utf-8")


	@app.get("/api/health")
	async def health() -> JSONResponse:
	return _json(
	{
	"ok": True,
	"app": "Tiny Narrator",
	"frontend": "custom Gradio Server HTML/CSS/JS",
	"llama_cpp_base_url": LLAMA_CPP_BASE_URL,
	"public_base_url": PUBLIC_BASE_URL,
	"models": MODEL_MANIFEST,
	}
	)


	@app.get("/api/article-manifest")
	async def article_manifest() -> JSONResponse:
	return _json({"ok": True, **ARTICLE_MANIFEST})


	@app.get("/api/award-evidence")
	async def award_evidence() -> JSONResponse:
	return _json({"ok": True, "items": AWARD_EVIDENCE})


	@app.get("/api/model-budget")
	async def model_budget() -> JSONResponse:
	return _json(model_budget_core())


	@app.get("/api/runtime-setup")
	async def runtime_setup() -> JSONResponse:
	return _json(runtime_setup_core())


	@app.get("/api/demo-script")
	async def demo_script() -> JSONResponse:
	return _json(demo_script_core())


	@app.get("/api/accessibility-audit")
	async def accessibility_audit() -> JSONResponse:
	return _json(accessibility_audit_core())


	@app.get("/api/submission-readiness")
	async def submission_readiness() -> JSONResponse:
	return _json(submission_readiness_core())


	@app.get("/api/evidence-bundle")
	async def evidence_bundle() -> JSONResponse:
	return _json(evidence_bundle_core())


	@app.get("/api/runtime-status")
	async def runtime_status() -> JSONResponse:
	return _json(_runtime_status_core())


	@app.post("/api/reader-brain")
	async def reader_brain_endpoint(payload: ReaderBrainRequest) -> JSONResponse:
	return _json(reader_brain_core(payload.node_type, payload.text, payload.position, payload.mode))


	@app.post("/api/describe-image")
	async def describe_image_endpoint(payload: ImageDescriptionRequest) -> JSONResponse:
	return _json(describe_image_core(payload.image_id, payload.caption, payload.prompt, payload.image_url))


	@app.get("/api/image-descriptions")
	async def image_descriptions_endpoint() -> JSONResponse:
	return _json(describe_article_images_core())


	@app.post("/api/speak")
	async def speak_endpoint(payload: SpeechRequest) -> JSONResponse:
	return _json(speak_core(payload.text, payload.voice, payload.speed))


	@app.post("/api/generate-image")
	async def generate_image_endpoint(payload: ImageGenerationRequest) -> JSONResponse:
	return _json(generate_image_core(payload.prompt, payload.seed))


	@app.post("/api/generate-article")
	async def generate_article_endpoint(payload: ArticleGenerationRequest) -> JSONResponse:
	return _json(generate_article_core(payload.topic))


	@app.api(name="reader_brain")
	def reader_brain_api(node_type: str, text: str, position: str = "", mode: str = "narrate") -> str:
	return json.dumps(reader_brain_core(node_type, text, position, mode))


	@app.api(name="describe_image")
	def describe_image_api(image_id: str, caption: str = "", prompt: str = "", image_url: str = "") -> str:
	return json.dumps(describe_image_core(image_id, caption, prompt, image_url))


	@app.api(name="describe_article_images")
	def describe_article_images_api() -> str:
	return json.dumps(describe_article_images_core())


	@app.api(name="model_budget")
	def model_budget_api() -> str:
	return json.dumps(model_budget_core())


	@app.api(name="runtime_setup")
	def runtime_setup_api() -> str:
	return json.dumps(runtime_setup_core())


	@app.api(name="demo_script")
	def demo_script_api() -> str:
	return json.dumps(demo_script_core())


	@app.api(name="accessibility_audit")
	def accessibility_audit_api() -> str:
	return json.dumps(accessibility_audit_core())


	@app.api(name="submission_readiness")
	def submission_readiness_api() -> str:
	return json.dumps(submission_readiness_core())


	@app.api(name="evidence_bundle")
	def evidence_bundle_api() -> str:
	return json.dumps(evidence_bundle_core())


	@app.api(name="speak")
	def speak_api(text: str, voice: str = "af_heart", speed: float = 1.0) -> str:
	return json.dumps(speak_core(text, voice, speed))


	@app.api(name="generate_image")
	def generate_image_api(prompt: str, seed: int \| None = None) -> str:
	return json.dumps(generate_image_core(prompt, seed))


	@app.api(name="generate_article")
	def generate_article_api(topic: str) -> str:
	return json.dumps(generate_article_core(topic))


	@app.exception_handler(Exception)
	async def handle_exception(_: Request, exc: Exception) -> JSONResponse:
	return _json({"ok": False, "error": exc.__class__.__name__, "detail": str(exc)}, status_code=500)


	if __name__ == "__main__":
	app.launch(server_name=GRADIO_SERVER_NAME, server_port=GRADIO_SERVER_PORT, share=GRADIO_SHARE)