Spaces:
Build error
Build error
| import requests | |
| import logging, os | |
| from typing import Iterator, List, Union | |
| from langchain_core.document_loaders import BaseLoader | |
| from langchain_core.documents import Document | |
| from open_webui.env import SRC_LOG_LEVELS | |
| log = logging.getLogger(__name__) | |
| log.setLevel(SRC_LOG_LEVELS["RAG"]) | |
| class ExternalDocumentLoader(BaseLoader): | |
| def __init__( | |
| self, | |
| file_path, | |
| url: str, | |
| api_key: str, | |
| mime_type=None, | |
| **kwargs, | |
| ) -> None: | |
| self.url = url | |
| self.api_key = api_key | |
| self.file_path = file_path | |
| self.mime_type = mime_type | |
| def load(self) -> List[Document]: | |
| with open(self.file_path, "rb") as f: | |
| data = f.read() | |
| headers = {} | |
| if self.mime_type is not None: | |
| headers["Content-Type"] = self.mime_type | |
| if self.api_key is not None: | |
| headers["Authorization"] = f"Bearer {self.api_key}" | |
| try: | |
| headers["X-Filename"] = os.path.basename(self.file_path) | |
| except: | |
| pass | |
| url = self.url | |
| if url.endswith("/"): | |
| url = url[:-1] | |
| try: | |
| response = requests.put(f"{url}/process", data=data, headers=headers) | |
| except Exception as e: | |
| log.error(f"Error connecting to endpoint: {e}") | |
| raise Exception(f"Error connecting to endpoint: {e}") | |
| if response.ok: | |
| response_data = response.json() | |
| if response_data: | |
| if isinstance(response_data, dict): | |
| return [ | |
| Document( | |
| page_content=response_data.get("page_content"), | |
| metadata=response_data.get("metadata"), | |
| ) | |
| ] | |
| elif isinstance(response_data, list): | |
| documents = [] | |
| for document in response_data: | |
| documents.append( | |
| Document( | |
| page_content=document.get("page_content"), | |
| metadata=document.get("metadata"), | |
| ) | |
| ) | |
| return documents | |
| else: | |
| raise Exception("Error loading document: Unable to parse content") | |
| else: | |
| raise Exception("Error loading document: No content returned") | |
| else: | |
| raise Exception( | |
| f"Error loading document: {response.status_code} {response.text}" | |
| ) | |