adf-chatbot2 / src /image /processor.py
Yannick Lemin
fixed search
6dfc718
import io
import os
import base64
from io import BytesIO
from PIL import Image
from dotenv import load_dotenv
from openai import OpenAI
from src.utils.constants import Constants
from src.utils.logging import get_logger
class ImageProcessor:
"""
Class to handle image processing and description using OpenAI.
"""
def __init__(self):
"""
Initialize the image processor with OpenAI client.
"""
self.logger = get_logger()
self.logger.info("Initializing image processor")
load_dotenv()
self.openai_api_key = os.getenv("OPENAI_API_KEY")
if not self.openai_api_key:
self.logger.error("OPENAI_API_KEY environment variable is not set")
raise ValueError("OPENAI_API_KEY environment variable is not set")
try:
self.client = OpenAI(api_key=self.openai_api_key)
self.logger.info("OpenAI client initialized successfully")
except Exception as e:
self.logger.error(f"Failed to initialize OpenAI client: {str(e)}")
raise
def describe_image(self, image_data):
"""
Generate a description of the image using OpenAI.
Args:
image_data: The image data, can be a file path, bytes, or a PIL Image object.
Returns:
str: Description of the image.
"""
if not image_data:
self.logger.debug("No image data provided, returning empty description")
return ""
self.logger.info("Processing image for description")
try:
# Handle different types of image_data
if isinstance(image_data, str):
# Assume it's a file path
self.logger.debug(f"Loading image from file path: {image_data}")
with open(image_data, "rb") as image_file:
image_bytes = image_file.read()
image = Image.open(BytesIO(image_bytes))
elif isinstance(image_data, Image.Image):
# It's already a PIL Image object
self.logger.debug("Using provided PIL Image object")
image = image_data
else:
# Assume it's bytes
self.logger.debug("Using provided image bytes")
image = Image.open(BytesIO(image_data))
original_size = image.size
max_size = (1024, 1024) # OpenAI's recommended max size
image.thumbnail(max_size, Image.LANCZOS)
if original_size != image.size:
self.logger.debug(f"Resized image from {original_size} to {image.size}")
# Convert to base64
buffered = BytesIO()
image.save(buffered, format="JPEG")
base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
self.logger.debug("Image converted to base64 for API call")
# Call OpenAI API
self.logger.info(f"Calling OpenAI API with model: {Constants.MODEL}")
response = self.client.chat.completions.create(
model=Constants.MODEL,
messages=[
{"role": "system", "content": "You are a helpful assistant that describes images."},
{"role": "user", "content": [
{"type": "text", "text": Constants.IMAGE_DESCRIPTION_PROMPT},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
]}
],
max_tokens=300
)
description = response.choices[0].message.content.strip()
description_preview = description[:50] + "..." if len(description) > 50 else description
self.logger.info(f"Image description generated: {description_preview}")
return description
except Exception as e:
self.logger.error(f"Error describing image: {str(e)}")
raise
def resize_image(self, image_data, max_width=300, max_height=300):
"""
Resize an image to fit within the specified dimensions while maintaining aspect ratio.
Args:
image_data: The image data, can be a file path, bytes, or a PIL Image object.
max_width (int, optional): Maximum width of the resized image. Defaults to 300.
max_height (int, optional): Maximum height of the resized image. Defaults to 300.
Returns:
bytes: The resized image data.
"""
if not image_data:
self.logger.debug("No image data provided for resizing, returning None")
return None
self.logger.info(f"Resizing image to max dimensions: {max_width}x{max_height}")
try:
# Handle different types of image_data
if isinstance(image_data, str):
# Assume it's a file path
self.logger.debug(f"Loading image from file path: {image_data}")
image = Image.open(image_data)
elif isinstance(image_data, Image.Image):
# It's already a PIL Image object
self.logger.debug("Using provided PIL Image object")
image = image_data
else:
# Assume it's bytes
self.logger.debug("Loading image from bytes")
image = Image.open(BytesIO(image_data))
original_size = image.size
self.logger.debug(f"Original image size: {original_size}")
# Resize image
image.thumbnail((max_width, max_height), Image.LANCZOS)
new_size = image.size
self.logger.debug(f"Resized image to: {new_size}")
# Convert back to bytes
buffered = BytesIO()
image.save(buffered, format="JPEG")
result = buffered.getvalue()
self.logger.info(f"Image resized successfully from {original_size} to {new_size}")
return result
except Exception as e:
self.logger.error(f"Error resizing image: {str(e)}")
raise