Spaces:
Sleeping
Sleeping
| import io | |
| import os | |
| import base64 | |
| from io import BytesIO | |
| from PIL import Image | |
| from dotenv import load_dotenv | |
| from openai import OpenAI | |
| from src.utils.constants import Constants | |
| from src.utils.logging import get_logger | |
| class ImageProcessor: | |
| """ | |
| Class to handle image processing and description using OpenAI. | |
| """ | |
| def __init__(self): | |
| """ | |
| Initialize the image processor with OpenAI client. | |
| """ | |
| self.logger = get_logger() | |
| self.logger.info("Initializing image processor") | |
| load_dotenv() | |
| self.openai_api_key = os.getenv("OPENAI_API_KEY") | |
| if not self.openai_api_key: | |
| self.logger.error("OPENAI_API_KEY environment variable is not set") | |
| raise ValueError("OPENAI_API_KEY environment variable is not set") | |
| try: | |
| self.client = OpenAI(api_key=self.openai_api_key) | |
| self.logger.info("OpenAI client initialized successfully") | |
| except Exception as e: | |
| self.logger.error(f"Failed to initialize OpenAI client: {str(e)}") | |
| raise | |
| def describe_image(self, image_data): | |
| """ | |
| Generate a description of the image using OpenAI. | |
| Args: | |
| image_data: The image data, can be a file path, bytes, or a PIL Image object. | |
| Returns: | |
| str: Description of the image. | |
| """ | |
| if not image_data: | |
| self.logger.debug("No image data provided, returning empty description") | |
| return "" | |
| self.logger.info("Processing image for description") | |
| try: | |
| # Handle different types of image_data | |
| if isinstance(image_data, str): | |
| # Assume it's a file path | |
| self.logger.debug(f"Loading image from file path: {image_data}") | |
| with open(image_data, "rb") as image_file: | |
| image_bytes = image_file.read() | |
| image = Image.open(BytesIO(image_bytes)) | |
| elif isinstance(image_data, Image.Image): | |
| # It's already a PIL Image object | |
| self.logger.debug("Using provided PIL Image object") | |
| image = image_data | |
| else: | |
| # Assume it's bytes | |
| self.logger.debug("Using provided image bytes") | |
| image = Image.open(BytesIO(image_data)) | |
| original_size = image.size | |
| max_size = (1024, 1024) # OpenAI's recommended max size | |
| image.thumbnail(max_size, Image.LANCZOS) | |
| if original_size != image.size: | |
| self.logger.debug(f"Resized image from {original_size} to {image.size}") | |
| # Convert to base64 | |
| buffered = BytesIO() | |
| image.save(buffered, format="JPEG") | |
| base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8") | |
| self.logger.debug("Image converted to base64 for API call") | |
| # Call OpenAI API | |
| self.logger.info(f"Calling OpenAI API with model: {Constants.MODEL}") | |
| response = self.client.chat.completions.create( | |
| model=Constants.MODEL, | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant that describes images."}, | |
| {"role": "user", "content": [ | |
| {"type": "text", "text": Constants.IMAGE_DESCRIPTION_PROMPT}, | |
| {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}} | |
| ]} | |
| ], | |
| max_tokens=300 | |
| ) | |
| description = response.choices[0].message.content.strip() | |
| description_preview = description[:50] + "..." if len(description) > 50 else description | |
| self.logger.info(f"Image description generated: {description_preview}") | |
| return description | |
| except Exception as e: | |
| self.logger.error(f"Error describing image: {str(e)}") | |
| raise | |
| def resize_image(self, image_data, max_width=300, max_height=300): | |
| """ | |
| Resize an image to fit within the specified dimensions while maintaining aspect ratio. | |
| Args: | |
| image_data: The image data, can be a file path, bytes, or a PIL Image object. | |
| max_width (int, optional): Maximum width of the resized image. Defaults to 300. | |
| max_height (int, optional): Maximum height of the resized image. Defaults to 300. | |
| Returns: | |
| bytes: The resized image data. | |
| """ | |
| if not image_data: | |
| self.logger.debug("No image data provided for resizing, returning None") | |
| return None | |
| self.logger.info(f"Resizing image to max dimensions: {max_width}x{max_height}") | |
| try: | |
| # Handle different types of image_data | |
| if isinstance(image_data, str): | |
| # Assume it's a file path | |
| self.logger.debug(f"Loading image from file path: {image_data}") | |
| image = Image.open(image_data) | |
| elif isinstance(image_data, Image.Image): | |
| # It's already a PIL Image object | |
| self.logger.debug("Using provided PIL Image object") | |
| image = image_data | |
| else: | |
| # Assume it's bytes | |
| self.logger.debug("Loading image from bytes") | |
| image = Image.open(BytesIO(image_data)) | |
| original_size = image.size | |
| self.logger.debug(f"Original image size: {original_size}") | |
| # Resize image | |
| image.thumbnail((max_width, max_height), Image.LANCZOS) | |
| new_size = image.size | |
| self.logger.debug(f"Resized image to: {new_size}") | |
| # Convert back to bytes | |
| buffered = BytesIO() | |
| image.save(buffered, format="JPEG") | |
| result = buffered.getvalue() | |
| self.logger.info(f"Image resized successfully from {original_size} to {new_size}") | |
| return result | |
| except Exception as e: | |
| self.logger.error(f"Error resizing image: {str(e)}") | |
| raise | |