Spaces:
Runtime error
Runtime error
| from typing import List | |
| from langchain.docstore.document import Document | |
| from langchain.document_loaders.azure_blob_storage_file import ( | |
| AzureBlobStorageFileLoader, | |
| ) | |
| from langchain.document_loaders.base import BaseLoader | |
| class AzureBlobStorageContainerLoader(BaseLoader): | |
| """Load from `Azure Blob Storage` container.""" | |
| def __init__(self, conn_str: str, container: str, prefix: str = ""): | |
| """Initialize with connection string, container and blob prefix.""" | |
| self.conn_str = conn_str | |
| """Connection string for Azure Blob Storage.""" | |
| self.container = container | |
| """Container name.""" | |
| self.prefix = prefix | |
| """Prefix for blob names.""" | |
| def load(self) -> List[Document]: | |
| """Load documents.""" | |
| try: | |
| from azure.storage.blob import ContainerClient | |
| except ImportError as exc: | |
| raise ImportError( | |
| "Could not import azure storage blob python package. " | |
| "Please install it with `pip install azure-storage-blob`." | |
| ) from exc | |
| container = ContainerClient.from_connection_string( | |
| conn_str=self.conn_str, container_name=self.container | |
| ) | |
| docs = [] | |
| blob_list = container.list_blobs(name_starts_with=self.prefix) | |
| for blob in blob_list: | |
| loader = AzureBlobStorageFileLoader( | |
| self.conn_str, | |
| self.container, | |
| blob.name, # type: ignore | |
| ) | |
| docs.extend(loader.load()) | |
| return docs | |