File size: 6,109 Bytes
6dfc718
1fed057
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6dfc718
1fed057
 
 
 
 
 
 
 
 
 
6dfc718
1fed057
 
 
 
 
6dfc718
 
 
 
 
1fed057
6dfc718
1fed057
6dfc718
1fed057
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6dfc718
1fed057
 
 
 
 
 
 
 
 
 
 
 
6dfc718
1fed057
 
 
 
6dfc718
 
 
 
1fed057
6dfc718
1fed057
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import io
import os
import base64
from io import BytesIO
from PIL import Image
from dotenv import load_dotenv
from openai import OpenAI
from src.utils.constants import Constants
from src.utils.logging import get_logger

class ImageProcessor:
    """
    Class to handle image processing and description using OpenAI.
    """

    def __init__(self):
        """
        Initialize the image processor with OpenAI client.
        """
        self.logger = get_logger()
        self.logger.info("Initializing image processor")

        load_dotenv()
        self.openai_api_key = os.getenv("OPENAI_API_KEY")
        if not self.openai_api_key:
            self.logger.error("OPENAI_API_KEY environment variable is not set")
            raise ValueError("OPENAI_API_KEY environment variable is not set")

        try:
            self.client = OpenAI(api_key=self.openai_api_key)
            self.logger.info("OpenAI client initialized successfully")
        except Exception as e:
            self.logger.error(f"Failed to initialize OpenAI client: {str(e)}")
            raise

    def describe_image(self, image_data):
        """
        Generate a description of the image using OpenAI.

        Args:
            image_data: The image data, can be a file path, bytes, or a PIL Image object.

        Returns:
            str: Description of the image.
        """
        if not image_data:
            self.logger.debug("No image data provided, returning empty description")
            return ""

        self.logger.info("Processing image for description")
        try:
            # Handle different types of image_data
            if isinstance(image_data, str):
                # Assume it's a file path
                self.logger.debug(f"Loading image from file path: {image_data}")
                with open(image_data, "rb") as image_file:
                    image_bytes = image_file.read()
                image = Image.open(BytesIO(image_bytes))
            elif isinstance(image_data, Image.Image):
                # It's already a PIL Image object
                self.logger.debug("Using provided PIL Image object")
                image = image_data
            else:
                # Assume it's bytes
                self.logger.debug("Using provided image bytes")
                image = Image.open(BytesIO(image_data))
            original_size = image.size
            max_size = (1024, 1024)  # OpenAI's recommended max size
            image.thumbnail(max_size, Image.LANCZOS)
            if original_size != image.size:
                self.logger.debug(f"Resized image from {original_size} to {image.size}")

            # Convert to base64
            buffered = BytesIO()
            image.save(buffered, format="JPEG")
            base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
            self.logger.debug("Image converted to base64 for API call")

            # Call OpenAI API
            self.logger.info(f"Calling OpenAI API with model: {Constants.MODEL}")
            response = self.client.chat.completions.create(
                model=Constants.MODEL,
                messages=[
                    {"role": "system", "content": "You are a helpful assistant that describes images."},
                    {"role": "user", "content": [
                        {"type": "text", "text": Constants.IMAGE_DESCRIPTION_PROMPT},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
                    ]}
                ],
                max_tokens=300
            )

            description = response.choices[0].message.content.strip()
            description_preview = description[:50] + "..." if len(description) > 50 else description
            self.logger.info(f"Image description generated: {description_preview}")
            return description
        except Exception as e:
            self.logger.error(f"Error describing image: {str(e)}")
            raise

    def resize_image(self, image_data, max_width=300, max_height=300):
        """
        Resize an image to fit within the specified dimensions while maintaining aspect ratio.

        Args:
            image_data: The image data, can be a file path, bytes, or a PIL Image object.
            max_width (int, optional): Maximum width of the resized image. Defaults to 300.
            max_height (int, optional): Maximum height of the resized image. Defaults to 300.

        Returns:
            bytes: The resized image data.
        """
        if not image_data:
            self.logger.debug("No image data provided for resizing, returning None")
            return None

        self.logger.info(f"Resizing image to max dimensions: {max_width}x{max_height}")
        try:
            # Handle different types of image_data
            if isinstance(image_data, str):
                # Assume it's a file path
                self.logger.debug(f"Loading image from file path: {image_data}")
                image = Image.open(image_data)
            elif isinstance(image_data, Image.Image):
                # It's already a PIL Image object
                self.logger.debug("Using provided PIL Image object")
                image = image_data
            else:
                # Assume it's bytes
                self.logger.debug("Loading image from bytes")
                image = Image.open(BytesIO(image_data))

            original_size = image.size
            self.logger.debug(f"Original image size: {original_size}")

            # Resize image
            image.thumbnail((max_width, max_height), Image.LANCZOS)
            new_size = image.size
            self.logger.debug(f"Resized image to: {new_size}")

            # Convert back to bytes
            buffered = BytesIO()
            image.save(buffered, format="JPEG")
            result = buffered.getvalue()
            self.logger.info(f"Image resized successfully from {original_size} to {new_size}")
            return result
        except Exception as e:
            self.logger.error(f"Error resizing image: {str(e)}")
            raise