multi-agent-lab / src /media /tools.py
agharsallah
feat(media): introduce MediaRouter and stubs for image and speech generation
8400d8c
Raw
History Blame Contribute Delete
2.8 kB
"""Wire image/speech generation into the :class:`ToolRegistry` as capability-checked tools.
The commentator (and only an agent granted them) calls ``image.render`` / ``tts.speak``
exactly like ``oracle`` β€” the registry enforces the manifest grant first (ADR-0012). Each
tool calls the :class:`MediaRouter` then applies the **hybrid transport**: a stub artifact
is inlined as a ``data:`` URI (self-contained, keeps the no-key demo and the exported
trace working with no files); a live artifact is written under the run's media dir and
referenced by a ``/file=`` URL (so the exported ledger/trace stays lean β€” just a path).
"""
from __future__ import annotations
import re
from pathlib import Path
from src.media.provider import MediaResult
from src.media.router import MediaRouter
from src.tools.registry import ToolRegistry
_UNSAFE = re.compile(r"[^A-Za-z0-9._-]+")
def _safe(part: str) -> str:
return _UNSAFE.sub("_", part or "x")[:80] or "x"
def _to_ref(result: MediaResult, *, media_dir: Path | None, run_id: str, slug: str) -> dict:
"""Turn a :class:`MediaResult` into a JSON-serialisable feed ref via hybrid transport."""
ext = result.mime.split("/")[-1].split(";")[0] or "bin"
ref = {"mime": result.mime, "model_id": result.model_id, "usage": dict(result.usage)}
# Stub output (or no writable dir) β†’ inline; live output β†’ a served file.
if result.model_id.startswith("stub:") or media_dir is None:
ref["src"] = result.data_uri()
return ref
out_dir = Path(media_dir) / _safe(run_id or "run")
out_dir.mkdir(parents=True, exist_ok=True)
path = (out_dir / f"{_safe(slug)}.{ext}").resolve()
path.write_bytes(result.data)
ref["src"] = f"/file={path}"
return ref
def register_media_tools(registry: ToolRegistry, router: MediaRouter, media_dir: Path | None = None) -> None:
"""Register ``image.render`` and ``tts.speak`` as in-process, capability-checked tools."""
def _image(prompt: str = "", run_id: str = "", slug: str = "", style: str = "", **_: object) -> dict:
result = router.image_for().generate(str(prompt), style=style or None)
return _to_ref(result, media_dir=media_dir, run_id=run_id, slug=slug or "img")
def _speak(text: str = "", run_id: str = "", slug: str = "", voice: str = "", **_: object) -> dict:
result = router.speech_for().synthesize(str(text), voice=voice or None)
return _to_ref(result, media_dir=media_dir, run_id=run_id, slug=slug or "tts")
registry.register(
"image.render",
description="Draw an illustration of the current beat. Params: {prompt: str}.",
run=_image,
)
registry.register(
"tts.speak",
description="Speak a line aloud as audio. Params: {text: str}.",
run=_speak,
)