""" Knowledge Universe — Format Adapter Factory Central registry for all output format adapters. Supported output formats: json → standard JSON dict (default) html → rich HTML cards with format-aware rendering pdf → PDF manifest for document processing pipelines streaming → streaming manifest for video/audio embeddings → vector embeddings for RAG (384-dim, all-MiniLM-L6-v2) """ import logging from typing import Any from src.api.models import Source from src.format_adapters.json_adapter import JSONFormatAdapter from src.format_adapters.html_adapter import HTMLFormatAdapter from src.format_adapters.pdf_adapter import PDFManifestAdapter from src.format_adapters.streaming_adapter import StreamingAdapter from src.format_adapters.embedding_adapter import EmbeddingAdapter logger = logging.getLogger(__name__) class FormatAdapterFactory: """ Factory for all output format adapters. Single entry point: FormatAdapterFactory.get_adapter("json") """ # Registry: output format name → adapter instance _adapters = { "json": JSONFormatAdapter(), "html": HTMLFormatAdapter(), "pdf": PDFManifestAdapter(), "streaming": StreamingAdapter(), "embeddings": EmbeddingAdapter(), # Aliases "embed": EmbeddingAdapter(), "manifest": PDFManifestAdapter(), "stream": StreamingAdapter(), } @classmethod def get_adapter(cls, format_type: str): """ Get adapter for output format. Args: format_type: "json" | "html" | "pdf" | "streaming" | "embeddings" Returns: Adapter instance with .transform(source) method. """ adapter = cls._adapters.get(format_type.lower()) if not adapter: logger.warning( f"Unknown output format '{format_type}', defaulting to JSON. " f"Available: {list(cls._adapters.keys())}" ) return cls._adapters["json"] return adapter @classmethod def transform(cls, source: Source, format_type: str) -> Any: """Transform a single source to the specified output format.""" return cls.get_adapter(format_type).transform(source) @classmethod def transform_many(cls, sources: list, format_type: str) -> list: """Transform multiple sources — uses batch optimization where available.""" return cls.get_adapter(format_type).transform_many(sources) @classmethod def available_formats(cls) -> list: """Return list of unique adapter format names (no aliases).""" return ["json", "html", "pdf", "streaming", "embeddings"]