carolinacon's picture
Refactoring and started filling in the README file
b4f9800
from typing import Literal, Optional
import mimetypes
import base64
from pathlib import Path
def get_content_type(mime_type: str) -> Optional[Literal["image", "audio", "video", "document"]]:
"""Extracts content type from MIME type string."""
if not mime_type:
return None
# Split into type/subtype (e.g., "image/png" → "image")
main_type = mime_type.split('/')[0].lower()
# Map to LangChain content types
if main_type in ["image", "audio", "file", "text"]:
return main_type
elif mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
return "file"
raise Exception(f"Cannot extract type from mime_type {mime_type}")
class Attachment:
content: bytes
mime_type: str
file_path: str
type: str
def __init__(self, content: bytes, file_path: str):
self.content = content
self.file_path = file_path
self.mime_type = mimetypes.guess_type(file_path)[0]
self.type = get_content_type(self.mime_type)
def get_encoded_content_b64(self) -> str:
return base64.b64encode(self.content).decode("utf-8")
class AttachmentHandler:
def __init__(self, supported_types: list):
self.supported_types = supported_types
def get_representation(self, type: str, content: bytes, format: str, mime_type) -> dict:
if type not in self.supported_types:
raise Exception(f"Invalid attachment type{type}")
base64_content = base64.b64encode(content).decode("utf-8")
if type == "audio":
return {"type": "input_audio",
"input_audio": {"data": base64_content, "format": format}}
if type == "image":
return {"type": "image_url",
"image_url": {"url": f"data:{mime_type};base64," + base64_content}}
raise Exception(f"Cannot extract a representation for type {type}")
def fetch_file_from_reference(self, file_reference: str) -> bytes:
"""Fetches file bytes from a reference """
# It's a local file path
file = Path(file_reference)
if file_reference.startswith("/") or file_reference.startswith("./") or file.exists():
return file.read_bytes()
else:
raise ValueError(
f"Could not resolve file reference: {file_reference}. Implement 'fetch_file_from_reference' for your "
f"storage system.")
supported_types = ["image", "audio", "file", "text"]
attachmentHandler = AttachmentHandler(supported_types)