""" Knowledge Universe — Streaming Adapter Produces streaming endpoint manifests for video and audio sources. Useful for media players and transcript fetchers. """ from typing import Any, Dict from src.api.models import Source from src.format_adapters.base_adapter import BaseFormatAdapter class StreamingAdapter(BaseFormatAdapter): """ Returns a streaming manifest — structured metadata describing how to stream or access media (video, audio, podcast). Downstream: media players, transcript pipelines, caption extractors. """ def transform(self, source: Source) -> Dict[str, Any]: media_links = [ link for link in source.links if link.format.value in ("video", "audio", "podcast", "live", "animation") ] return { "manifest_version": "1.0", "source_id": source.id, "title": source.title, "platform": source.source_platform, "media_type": self._primary_media_type(source), "streams": [ { "url": link.url, "format": link.format.value, "access_method": link.access_method, "quality": "auto", } for link in (media_links or source.links[:1]) ], "metadata": { "duration_seconds": source.duration_seconds, "language": source.language, "thumbnail": source.thumbnail_url, "authors": source.authors, "open_access": source.open_access, }, "transcript_options": self._transcript_options(source), } def _primary_media_type(self, source: Source) -> str: for fmt in source.formats: if fmt.value in ("video", "audio", "podcast"): return fmt.value return "media" def _transcript_options(self, source: Source) -> Dict[str, Any]: """Describe how to get a transcript if available.""" if source.source_platform == "youtube": # Extract video ID from URL vid_id = "" if "v=" in source.url: vid_id = source.url.split("v=")[-1].split("&")[0] elif "youtu.be/" in source.url: vid_id = source.url.split("youtu.be/")[-1] return { "available": True, "method": "yt-dlp", "command": f"yt-dlp --write-auto-sub --skip-download {source.url}", "caption_url": f"https://www.youtube.com/api/timedtext?lang=en&v={vid_id}" if vid_id else None, } if source.source_platform == "podcast": return { "available": bool(any( link.format.value == "transcript" for link in source.links )), "method": "rss_feed", "note": "Check RSS feed for embedded transcript or SRT file", } return {"available": False}