Spaces:
Sleeping
Sleeping
| import logging | |
| import os | |
| from .providers import BaseConverter | |
| from .providers.docling import DoclingConverter | |
| from .providers.kreuzberg_converter import KreuzbergConverter | |
| from .providers.markdown_converter import MarkdownConverter | |
| logger = logging.getLogger(__name__) | |
| class ConvertersFactory: | |
| def get_converters( | |
| content_type: str, embedding_model: str | |
| ) -> list[BaseConverter]: | |
| preferred = os.getenv("KG_PDF_CONVERTER", "docling").lower() | |
| fallback_enabled = os.getenv("KG_PDF_FALLBACK", "false").lower() in { | |
| "1", | |
| "true", | |
| "yes", | |
| "on", | |
| } | |
| converters: list[BaseConverter] = [] | |
| def add_docling() -> None: | |
| if DoclingConverter.supports_content_type(content_type): | |
| converters.append(DoclingConverter(embedding_model)) | |
| def add_kreuzberg() -> None: | |
| if KreuzbergConverter.supports_content_type(content_type): | |
| converters.append(KreuzbergConverter(content_type)) | |
| def add_markdown() -> None: | |
| if MarkdownConverter.supports_content_type(content_type): | |
| converters.append(MarkdownConverter()) | |
| add_markdown() | |
| if converters: | |
| return converters | |
| match preferred: | |
| case "docling": | |
| add_docling() | |
| if fallback_enabled: | |
| add_kreuzberg() | |
| case "kreuzberg": | |
| add_kreuzberg() | |
| if fallback_enabled: | |
| add_docling() | |
| case "auto": | |
| add_docling() | |
| if fallback_enabled: | |
| add_kreuzberg() | |
| case _: | |
| add_docling() | |
| if fallback_enabled: | |
| add_kreuzberg() | |
| if not converters: | |
| raise ValueError(f"Unsupported content type: {content_type}") | |
| return converters | |