Spaces:
Running
Running
| """ | |
| Knowledge Universe β Format Adapter Factory | |
| Central registry for all output format adapters. | |
| Supported output formats: | |
| json β standard JSON dict (default) | |
| html β rich HTML cards with format-aware rendering | |
| pdf β PDF manifest for document processing pipelines | |
| streaming β streaming manifest for video/audio | |
| embeddings β vector embeddings for RAG (384-dim, all-MiniLM-L6-v2) | |
| """ | |
| import logging | |
| from typing import Any | |
| from src.api.models import Source | |
| from src.format_adapters.json_adapter import JSONFormatAdapter | |
| from src.format_adapters.html_adapter import HTMLFormatAdapter | |
| from src.format_adapters.pdf_adapter import PDFManifestAdapter | |
| from src.format_adapters.streaming_adapter import StreamingAdapter | |
| from src.format_adapters.embedding_adapter import EmbeddingAdapter | |
| logger = logging.getLogger(__name__) | |
| class FormatAdapterFactory: | |
| """ | |
| Factory for all output format adapters. | |
| Single entry point: FormatAdapterFactory.get_adapter("json") | |
| """ | |
| # Registry: output format name β adapter instance | |
| _adapters = { | |
| "json": JSONFormatAdapter(), | |
| "html": HTMLFormatAdapter(), | |
| "pdf": PDFManifestAdapter(), | |
| "streaming": StreamingAdapter(), | |
| "embeddings": EmbeddingAdapter(), | |
| # Aliases | |
| "embed": EmbeddingAdapter(), | |
| "manifest": PDFManifestAdapter(), | |
| "stream": StreamingAdapter(), | |
| } | |
| def get_adapter(cls, format_type: str): | |
| """ | |
| Get adapter for output format. | |
| Args: | |
| format_type: "json" | "html" | "pdf" | "streaming" | "embeddings" | |
| Returns: | |
| Adapter instance with .transform(source) method. | |
| """ | |
| adapter = cls._adapters.get(format_type.lower()) | |
| if not adapter: | |
| logger.warning( | |
| f"Unknown output format '{format_type}', defaulting to JSON. " | |
| f"Available: {list(cls._adapters.keys())}" | |
| ) | |
| return cls._adapters["json"] | |
| return adapter | |
| def transform(cls, source: Source, format_type: str) -> Any: | |
| """Transform a single source to the specified output format.""" | |
| return cls.get_adapter(format_type).transform(source) | |
| def transform_many(cls, sources: list, format_type: str) -> list: | |
| """Transform multiple sources β uses batch optimization where available.""" | |
| return cls.get_adapter(format_type).transform_many(sources) | |
| def available_formats(cls) -> list: | |
| """Return list of unique adapter format names (no aliases).""" | |
| return ["json", "html", "pdf", "streaming", "embeddings"] |