import io import os import base64 from io import BytesIO from PIL import Image from dotenv import load_dotenv from openai import OpenAI from src.utils.constants import Constants from src.utils.logging import get_logger class ImageProcessor: """ Class to handle image processing and description using OpenAI. """ def __init__(self): """ Initialize the image processor with OpenAI client. """ self.logger = get_logger() self.logger.info("Initializing image processor") load_dotenv() self.openai_api_key = os.getenv("OPENAI_API_KEY") if not self.openai_api_key: self.logger.error("OPENAI_API_KEY environment variable is not set") raise ValueError("OPENAI_API_KEY environment variable is not set") try: self.client = OpenAI(api_key=self.openai_api_key) self.logger.info("OpenAI client initialized successfully") except Exception as e: self.logger.error(f"Failed to initialize OpenAI client: {str(e)}") raise def describe_image(self, image_data): """ Generate a description of the image using OpenAI. Args: image_data: The image data, can be a file path, bytes, or a PIL Image object. Returns: str: Description of the image. """ if not image_data: self.logger.debug("No image data provided, returning empty description") return "" self.logger.info("Processing image for description") try: # Handle different types of image_data if isinstance(image_data, str): # Assume it's a file path self.logger.debug(f"Loading image from file path: {image_data}") with open(image_data, "rb") as image_file: image_bytes = image_file.read() image = Image.open(BytesIO(image_bytes)) elif isinstance(image_data, Image.Image): # It's already a PIL Image object self.logger.debug("Using provided PIL Image object") image = image_data else: # Assume it's bytes self.logger.debug("Using provided image bytes") image = Image.open(BytesIO(image_data)) original_size = image.size max_size = (1024, 1024) # OpenAI's recommended max size image.thumbnail(max_size, Image.LANCZOS) if original_size != image.size: self.logger.debug(f"Resized image from {original_size} to {image.size}") # Convert to base64 buffered = BytesIO() image.save(buffered, format="JPEG") base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8") self.logger.debug("Image converted to base64 for API call") # Call OpenAI API self.logger.info(f"Calling OpenAI API with model: {Constants.MODEL}") response = self.client.chat.completions.create( model=Constants.MODEL, messages=[ {"role": "system", "content": "You are a helpful assistant that describes images."}, {"role": "user", "content": [ {"type": "text", "text": Constants.IMAGE_DESCRIPTION_PROMPT}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}} ]} ], max_tokens=300 ) description = response.choices[0].message.content.strip() description_preview = description[:50] + "..." if len(description) > 50 else description self.logger.info(f"Image description generated: {description_preview}") return description except Exception as e: self.logger.error(f"Error describing image: {str(e)}") raise def resize_image(self, image_data, max_width=300, max_height=300): """ Resize an image to fit within the specified dimensions while maintaining aspect ratio. Args: image_data: The image data, can be a file path, bytes, or a PIL Image object. max_width (int, optional): Maximum width of the resized image. Defaults to 300. max_height (int, optional): Maximum height of the resized image. Defaults to 300. Returns: bytes: The resized image data. """ if not image_data: self.logger.debug("No image data provided for resizing, returning None") return None self.logger.info(f"Resizing image to max dimensions: {max_width}x{max_height}") try: # Handle different types of image_data if isinstance(image_data, str): # Assume it's a file path self.logger.debug(f"Loading image from file path: {image_data}") image = Image.open(image_data) elif isinstance(image_data, Image.Image): # It's already a PIL Image object self.logger.debug("Using provided PIL Image object") image = image_data else: # Assume it's bytes self.logger.debug("Loading image from bytes") image = Image.open(BytesIO(image_data)) original_size = image.size self.logger.debug(f"Original image size: {original_size}") # Resize image image.thumbnail((max_width, max_height), Image.LANCZOS) new_size = image.size self.logger.debug(f"Resized image to: {new_size}") # Convert back to bytes buffered = BytesIO() image.save(buffered, format="JPEG") result = buffered.getvalue() self.logger.info(f"Image resized successfully from {original_size} to {new_size}") return result except Exception as e: self.logger.error(f"Error resizing image: {str(e)}") raise