Knowledge-Universe / src /format_adapters /adapter_factory.py
vlsiddarth's picture
Commit latest version with ranking logic and API fixes
0733aae
"""
Knowledge Universe β€” Format Adapter Factory
Central registry for all output format adapters.
Supported output formats:
json β†’ standard JSON dict (default)
html β†’ rich HTML cards with format-aware rendering
pdf β†’ PDF manifest for document processing pipelines
streaming β†’ streaming manifest for video/audio
embeddings β†’ vector embeddings for RAG (384-dim, all-MiniLM-L6-v2)
"""
import logging
from typing import Any
from src.api.models import Source
from src.format_adapters.json_adapter import JSONFormatAdapter
from src.format_adapters.html_adapter import HTMLFormatAdapter
from src.format_adapters.pdf_adapter import PDFManifestAdapter
from src.format_adapters.streaming_adapter import StreamingAdapter
from src.format_adapters.embedding_adapter import EmbeddingAdapter
logger = logging.getLogger(__name__)
class FormatAdapterFactory:
"""
Factory for all output format adapters.
Single entry point: FormatAdapterFactory.get_adapter("json")
"""
# Registry: output format name β†’ adapter instance
_adapters = {
"json": JSONFormatAdapter(),
"html": HTMLFormatAdapter(),
"pdf": PDFManifestAdapter(),
"streaming": StreamingAdapter(),
"embeddings": EmbeddingAdapter(),
# Aliases
"embed": EmbeddingAdapter(),
"manifest": PDFManifestAdapter(),
"stream": StreamingAdapter(),
}
@classmethod
def get_adapter(cls, format_type: str):
"""
Get adapter for output format.
Args:
format_type: "json" | "html" | "pdf" | "streaming" | "embeddings"
Returns:
Adapter instance with .transform(source) method.
"""
adapter = cls._adapters.get(format_type.lower())
if not adapter:
logger.warning(
f"Unknown output format '{format_type}', defaulting to JSON. "
f"Available: {list(cls._adapters.keys())}"
)
return cls._adapters["json"]
return adapter
@classmethod
def transform(cls, source: Source, format_type: str) -> Any:
"""Transform a single source to the specified output format."""
return cls.get_adapter(format_type).transform(source)
@classmethod
def transform_many(cls, sources: list, format_type: str) -> list:
"""Transform multiple sources β€” uses batch optimization where available."""
return cls.get_adapter(format_type).transform_many(sources)
@classmethod
def available_formats(cls) -> list:
"""Return list of unique adapter format names (no aliases)."""
return ["json", "html", "pdf", "streaming", "embeddings"]