from typing import Literal, Optional import mimetypes import base64 from pathlib import Path def get_content_type(mime_type: str) -> Optional[Literal["image", "audio", "video", "document"]]: """Extracts content type from MIME type string.""" if not mime_type: return None # Split into type/subtype (e.g., "image/png" → "image") main_type = mime_type.split('/')[0].lower() # Map to LangChain content types if main_type in ["image", "audio", "file", "text"]: return main_type elif mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": return "file" raise Exception(f"Cannot extract type from mime_type {mime_type}") class Attachment: content: bytes mime_type: str file_path: str type: str def __init__(self, content: bytes, file_path: str): self.content = content self.file_path = file_path self.mime_type = mimetypes.guess_type(file_path)[0] self.type = get_content_type(self.mime_type) def get_encoded_content_b64(self) -> str: return base64.b64encode(self.content).decode("utf-8") class AttachmentHandler: def __init__(self, supported_types: list): self.supported_types = supported_types def get_representation(self, type: str, content: bytes, format: str, mime_type) -> dict: if type not in self.supported_types: raise Exception(f"Invalid attachment type{type}") base64_content = base64.b64encode(content).decode("utf-8") if type == "audio": return {"type": "input_audio", "input_audio": {"data": base64_content, "format": format}} if type == "image": return {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64," + base64_content}} raise Exception(f"Cannot extract a representation for type {type}") def fetch_file_from_reference(self, file_reference: str) -> bytes: """Fetches file bytes from a reference """ # It's a local file path file = Path(file_reference) if file_reference.startswith("/") or file_reference.startswith("./") or file.exists(): return file.read_bytes() else: raise ValueError( f"Could not resolve file reference: {file_reference}. Implement 'fetch_file_from_reference' for your " f"storage system.") supported_types = ["image", "audio", "file", "text"] attachmentHandler = AttachmentHandler(supported_types)