Spaces:
Running
Running
| """ | |
| Knowledge Universe — Streaming Adapter | |
| Produces streaming endpoint manifests for video and audio sources. | |
| Useful for media players and transcript fetchers. | |
| """ | |
| from typing import Any, Dict | |
| from src.api.models import Source | |
| from src.format_adapters.base_adapter import BaseFormatAdapter | |
| class StreamingAdapter(BaseFormatAdapter): | |
| """ | |
| Returns a streaming manifest — structured metadata describing how | |
| to stream or access media (video, audio, podcast). | |
| Downstream: media players, transcript pipelines, caption extractors. | |
| """ | |
| def transform(self, source: Source) -> Dict[str, Any]: | |
| media_links = [ | |
| link for link in source.links | |
| if link.format.value in ("video", "audio", "podcast", "live", "animation") | |
| ] | |
| return { | |
| "manifest_version": "1.0", | |
| "source_id": source.id, | |
| "title": source.title, | |
| "platform": source.source_platform, | |
| "media_type": self._primary_media_type(source), | |
| "streams": [ | |
| { | |
| "url": link.url, | |
| "format": link.format.value, | |
| "access_method": link.access_method, | |
| "quality": "auto", | |
| } | |
| for link in (media_links or source.links[:1]) | |
| ], | |
| "metadata": { | |
| "duration_seconds": source.duration_seconds, | |
| "language": source.language, | |
| "thumbnail": source.thumbnail_url, | |
| "authors": source.authors, | |
| "open_access": source.open_access, | |
| }, | |
| "transcript_options": self._transcript_options(source), | |
| } | |
| def _primary_media_type(self, source: Source) -> str: | |
| for fmt in source.formats: | |
| if fmt.value in ("video", "audio", "podcast"): | |
| return fmt.value | |
| return "media" | |
| def _transcript_options(self, source: Source) -> Dict[str, Any]: | |
| """Describe how to get a transcript if available.""" | |
| if source.source_platform == "youtube": | |
| # Extract video ID from URL | |
| vid_id = "" | |
| if "v=" in source.url: | |
| vid_id = source.url.split("v=")[-1].split("&")[0] | |
| elif "youtu.be/" in source.url: | |
| vid_id = source.url.split("youtu.be/")[-1] | |
| return { | |
| "available": True, | |
| "method": "yt-dlp", | |
| "command": f"yt-dlp --write-auto-sub --skip-download {source.url}", | |
| "caption_url": f"https://www.youtube.com/api/timedtext?lang=en&v={vid_id}" if vid_id else None, | |
| } | |
| if source.source_platform == "podcast": | |
| return { | |
| "available": bool(any( | |
| link.format.value == "transcript" | |
| for link in source.links | |
| )), | |
| "method": "rss_feed", | |
| "note": "Check RSS feed for embedded transcript or SRT file", | |
| } | |
| return {"available": False} |