Knowledge-Universe / src /format_adapters /streaming_adapter.py
vlsiddarth's picture
Commit latest version with ranking logic and API fixes
0733aae
"""
Knowledge Universe — Streaming Adapter
Produces streaming endpoint manifests for video and audio sources.
Useful for media players and transcript fetchers.
"""
from typing import Any, Dict
from src.api.models import Source
from src.format_adapters.base_adapter import BaseFormatAdapter
class StreamingAdapter(BaseFormatAdapter):
"""
Returns a streaming manifest — structured metadata describing how
to stream or access media (video, audio, podcast).
Downstream: media players, transcript pipelines, caption extractors.
"""
def transform(self, source: Source) -> Dict[str, Any]:
media_links = [
link for link in source.links
if link.format.value in ("video", "audio", "podcast", "live", "animation")
]
return {
"manifest_version": "1.0",
"source_id": source.id,
"title": source.title,
"platform": source.source_platform,
"media_type": self._primary_media_type(source),
"streams": [
{
"url": link.url,
"format": link.format.value,
"access_method": link.access_method,
"quality": "auto",
}
for link in (media_links or source.links[:1])
],
"metadata": {
"duration_seconds": source.duration_seconds,
"language": source.language,
"thumbnail": source.thumbnail_url,
"authors": source.authors,
"open_access": source.open_access,
},
"transcript_options": self._transcript_options(source),
}
def _primary_media_type(self, source: Source) -> str:
for fmt in source.formats:
if fmt.value in ("video", "audio", "podcast"):
return fmt.value
return "media"
def _transcript_options(self, source: Source) -> Dict[str, Any]:
"""Describe how to get a transcript if available."""
if source.source_platform == "youtube":
# Extract video ID from URL
vid_id = ""
if "v=" in source.url:
vid_id = source.url.split("v=")[-1].split("&")[0]
elif "youtu.be/" in source.url:
vid_id = source.url.split("youtu.be/")[-1]
return {
"available": True,
"method": "yt-dlp",
"command": f"yt-dlp --write-auto-sub --skip-download {source.url}",
"caption_url": f"https://www.youtube.com/api/timedtext?lang=en&v={vid_id}" if vid_id else None,
}
if source.source_platform == "podcast":
return {
"available": bool(any(
link.format.value == "transcript"
for link in source.links
)),
"method": "rss_feed",
"note": "Check RSS feed for embedded transcript or SRT file",
}
return {"available": False}