Spaces:
Sleeping
Sleeping
File size: 6,109 Bytes
6dfc718 1fed057 6dfc718 1fed057 6dfc718 1fed057 6dfc718 1fed057 6dfc718 1fed057 6dfc718 1fed057 6dfc718 1fed057 6dfc718 1fed057 6dfc718 1fed057 6dfc718 1fed057 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import io
import os
import base64
from io import BytesIO
from PIL import Image
from dotenv import load_dotenv
from openai import OpenAI
from src.utils.constants import Constants
from src.utils.logging import get_logger
class ImageProcessor:
"""
Class to handle image processing and description using OpenAI.
"""
def __init__(self):
"""
Initialize the image processor with OpenAI client.
"""
self.logger = get_logger()
self.logger.info("Initializing image processor")
load_dotenv()
self.openai_api_key = os.getenv("OPENAI_API_KEY")
if not self.openai_api_key:
self.logger.error("OPENAI_API_KEY environment variable is not set")
raise ValueError("OPENAI_API_KEY environment variable is not set")
try:
self.client = OpenAI(api_key=self.openai_api_key)
self.logger.info("OpenAI client initialized successfully")
except Exception as e:
self.logger.error(f"Failed to initialize OpenAI client: {str(e)}")
raise
def describe_image(self, image_data):
"""
Generate a description of the image using OpenAI.
Args:
image_data: The image data, can be a file path, bytes, or a PIL Image object.
Returns:
str: Description of the image.
"""
if not image_data:
self.logger.debug("No image data provided, returning empty description")
return ""
self.logger.info("Processing image for description")
try:
# Handle different types of image_data
if isinstance(image_data, str):
# Assume it's a file path
self.logger.debug(f"Loading image from file path: {image_data}")
with open(image_data, "rb") as image_file:
image_bytes = image_file.read()
image = Image.open(BytesIO(image_bytes))
elif isinstance(image_data, Image.Image):
# It's already a PIL Image object
self.logger.debug("Using provided PIL Image object")
image = image_data
else:
# Assume it's bytes
self.logger.debug("Using provided image bytes")
image = Image.open(BytesIO(image_data))
original_size = image.size
max_size = (1024, 1024) # OpenAI's recommended max size
image.thumbnail(max_size, Image.LANCZOS)
if original_size != image.size:
self.logger.debug(f"Resized image from {original_size} to {image.size}")
# Convert to base64
buffered = BytesIO()
image.save(buffered, format="JPEG")
base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
self.logger.debug("Image converted to base64 for API call")
# Call OpenAI API
self.logger.info(f"Calling OpenAI API with model: {Constants.MODEL}")
response = self.client.chat.completions.create(
model=Constants.MODEL,
messages=[
{"role": "system", "content": "You are a helpful assistant that describes images."},
{"role": "user", "content": [
{"type": "text", "text": Constants.IMAGE_DESCRIPTION_PROMPT},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
]}
],
max_tokens=300
)
description = response.choices[0].message.content.strip()
description_preview = description[:50] + "..." if len(description) > 50 else description
self.logger.info(f"Image description generated: {description_preview}")
return description
except Exception as e:
self.logger.error(f"Error describing image: {str(e)}")
raise
def resize_image(self, image_data, max_width=300, max_height=300):
"""
Resize an image to fit within the specified dimensions while maintaining aspect ratio.
Args:
image_data: The image data, can be a file path, bytes, or a PIL Image object.
max_width (int, optional): Maximum width of the resized image. Defaults to 300.
max_height (int, optional): Maximum height of the resized image. Defaults to 300.
Returns:
bytes: The resized image data.
"""
if not image_data:
self.logger.debug("No image data provided for resizing, returning None")
return None
self.logger.info(f"Resizing image to max dimensions: {max_width}x{max_height}")
try:
# Handle different types of image_data
if isinstance(image_data, str):
# Assume it's a file path
self.logger.debug(f"Loading image from file path: {image_data}")
image = Image.open(image_data)
elif isinstance(image_data, Image.Image):
# It's already a PIL Image object
self.logger.debug("Using provided PIL Image object")
image = image_data
else:
# Assume it's bytes
self.logger.debug("Loading image from bytes")
image = Image.open(BytesIO(image_data))
original_size = image.size
self.logger.debug(f"Original image size: {original_size}")
# Resize image
image.thumbnail((max_width, max_height), Image.LANCZOS)
new_size = image.size
self.logger.debug(f"Resized image to: {new_size}")
# Convert back to bytes
buffered = BytesIO()
image.save(buffered, format="JPEG")
result = buffered.getvalue()
self.logger.info(f"Image resized successfully from {original_size} to {new_size}")
return result
except Exception as e:
self.logger.error(f"Error resizing image: {str(e)}")
raise
|