File size: 11,171 Bytes
5b6e956
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
"""
Composition Service
===================

Business logic for smart multi-image composition.
Builds intelligent prompts based on image types, camera angles, and lighting.
"""

from typing import Optional, List
from PIL import Image

from services.generation_service import GenerationService
from models.generation_request import GenerationRequest
from models.generation_result import GenerationResult
from utils.logging_utils import get_logger
from config.settings import Settings


logger = get_logger(__name__)


class CompositionService(GenerationService):
    """
    Service for intelligent multi-image composition.

    Builds prompts based on:
    - Image types (Subject, Background, Style, etc.)
    - Camera angles and shot types
    - Lighting conditions
    - Custom instructions

    Inherits from GenerationService for generation capabilities.
    """

    # Image type options
    IMAGE_TYPES = [
        "Subject/Character",
        "Background/Environment",
        "Style Reference",
        "Product",
        "Texture",
        "Not Used"
    ]

    # Shot type options
    SHOT_TYPES = [
        "close-up shot",
        "medium shot",
        "full body shot",
        "wide shot",
        "extreme close-up",
        "establishing shot"
    ]

    # Camera angle options
    CAMERA_ANGLES = [
        "eye-level perspective",
        "low-angle perspective",
        "high-angle perspective",
        "bird's-eye view",
        "Dutch angle (tilted)",
        "over-the-shoulder"
    ]

    # Lighting options
    LIGHTING_OPTIONS = [
        "Auto (match images)",
        "natural daylight",
        "golden hour sunlight",
        "soft diffused light",
        "dramatic side lighting",
        "backlit silhouette",
        "studio lighting",
        "moody atmospheric lighting",
        "neon/artificial lighting"
    ]

    def __init__(self, api_key: Optional[str] = None):
        """
        Initialize composition service.

        Args:
            api_key: Optional Gemini API key
        """
        super().__init__(api_key=api_key)
        logger.info("CompositionService initialized")

    def build_composition_prompt(
        self,
        image1_type: str = "Subject/Character",
        image2_type: str = "Background/Environment",
        image3_type: str = "Not Used",
        camera_angles: Optional[List[str]] = None,
        lighting: str = "Auto (match images)",
        shot_type: str = "medium shot",
        custom_instructions: str = "",
        is_character_sheet: bool = False
    ) -> str:
        """
        Build intelligent composition prompt.

        Based on Google's best practices for Gemini 2.5 Flash Image:
        - Narrative, descriptive language
        - Camera angles, lens types, lighting
        - Match perspectives and light direction
        - Specific about placement

        Args:
            image1_type: Type of first image
            image2_type: Type of second image
            image3_type: Type of third image
            camera_angles: List of selected camera angles
            lighting: Lighting description
            shot_type: Type of shot
            custom_instructions: Additional instructions
            is_character_sheet: Whether to generate character sheet

        Returns:
            Formatted prompt string
        """
        parts = []

        # Character sheet specific handling
        if is_character_sheet:
            parts.append("Create a character sheet design with multiple views and poses of the same character. ")
            if image1_type == "Subject/Character":
                parts.append("Based on the character from image one, ")
            parts.append("Include front view, side view, back view, and detail shots. ")
            parts.append("Maintain consistent character design, colors, and proportions across all views. ")
            if image2_type in ["Background/Environment", "Style Reference"]:
                parts.append(f"Apply the {image2_type.lower()} from image two as context. ")
        else:
            # Determine main action based on image types
            if image1_type == "Subject/Character" and image2_type == "Background/Environment":
                parts.append(f"A photorealistic {shot_type} ")
                parts.append(f"placing the subject from image one into the environment from image two. ")

            elif image1_type == "Subject/Character" and image2_type == "Style Reference":
                parts.append(f"Transform the subject from image one ")
                parts.append(f"into the artistic style shown in image two. ")

            elif image1_type == "Background/Environment" and image2_type == "Subject/Character":
                parts.append(f"A photorealistic {shot_type} ")
                parts.append(f"integrating the subject from image two into the environment from image one. ")

            else:
                # Generic multi-image composition
                parts.append("Combine ")
                if image1_type != "Not Used":
                    parts.append(f"the {image1_type.lower()} from image one")
                if image2_type != "Not Used":
                    parts.append(f" with the {image2_type.lower()} from image two")
                if image3_type != "Not Used":
                    parts.append(f" and the {image3_type.lower()} from image three")
                parts.append(". ")

        # Add camera angle specifics (not for character sheets)
        if camera_angles and not is_character_sheet:
            angles_text = ", ".join(camera_angles)
            parts.append(f"Shot from a {angles_text}. ")

        # Add lighting
        if lighting and lighting != "Auto (match images)":
            parts.append(f"The scene is illuminated by {lighting}, ")
            parts.append("matching the lighting direction and quality across all elements. ")

        # Add perspective matching (best practice)
        if not is_character_sheet:
            parts.append("Maintain consistent perspective, scale, and depth. ")

        # Add realism keywords
        parts.append("Create a natural, seamless composition with realistic shadows and reflections. ")
        parts.append("Photorealistic, high quality, professional photography.")

        # Add custom instructions
        if custom_instructions:
            parts.append(f" {custom_instructions}")

        return "".join(parts)

    def compose_images(
        self,
        images: List[Optional[Image.Image]],
        image_types: List[str],
        camera_angles: Optional[List[str]] = None,
        lighting: str = "Auto (match images)",
        shot_type: str = "medium shot",
        custom_instructions: str = "",
        is_character_sheet: bool = False,
        aspect_ratio: str = "16:9",
        temperature: float = 0.7,
        backend: str = Settings.BACKEND_GEMINI
    ) -> GenerationResult:
        """
        Compose images using intelligent prompt generation.

        Args:
            images: List of up to 3 images (None for unused slots)
            image_types: List of image types corresponding to images
            camera_angles: Selected camera angles
            lighting: Lighting option
            shot_type: Shot type
            custom_instructions: Custom instructions
            is_character_sheet: Character sheet mode
            aspect_ratio: Output aspect ratio
            temperature: Generation temperature
            backend: Backend to use

        Returns:
            GenerationResult object
        """
        try:
            # Filter out None images and corresponding types
            valid_images = []
            valid_types = []
            for i, img in enumerate(images):
                if img is not None and i < len(image_types):
                    valid_images.append(img)
                    valid_types.append(image_types[i])

            if not valid_images:
                logger.error("No valid images provided")
                return GenerationResult.error_result("No images provided for composition")

            # Pad types to 3 elements
            while len(valid_types) < 3:
                valid_types.append("Not Used")

            # Build prompt
            prompt = self.build_composition_prompt(
                image1_type=valid_types[0],
                image2_type=valid_types[1],
                image3_type=valid_types[2],
                camera_angles=camera_angles or [],
                lighting=lighting,
                shot_type=shot_type,
                custom_instructions=custom_instructions,
                is_character_sheet=is_character_sheet
            )

            logger.info(f"Composition prompt: {prompt[:200]}...")

            # Create request
            request = GenerationRequest(
                prompt=prompt,
                backend=backend,
                aspect_ratio=aspect_ratio,
                temperature=temperature,
                input_images=valid_images
            )

            # Generate
            result = self.router.generate(request)

            if result.success:
                logger.info("Composition generated successfully")
            else:
                logger.warning(f"Composition failed: {result.message}")

            return result

        except Exception as e:
            logger.exception(f"Composition error: {e}")
            return GenerationResult.error_result(f"Composition error: {str(e)}")

    def get_suggested_aspect_ratio(
        self,
        shot_type: str,
        is_character_sheet: bool = False
    ) -> str:
        """
        Suggest aspect ratio based on composition type.

        Args:
            shot_type: Shot type
            is_character_sheet: Character sheet mode

        Returns:
            Suggested aspect ratio string
        """
        if is_character_sheet:
            return "16:9"  # Wide format for multi-view layout

        if shot_type in ["full body shot", "establishing shot", "wide shot"]:
            return "16:9"  # Landscape for wide shots
        elif shot_type in ["close-up shot", "extreme close-up"]:
            return "3:4"  # Portrait for closeups
        else:
            return "1:1"  # Square for balanced compositions

    def validate_composition_inputs(
        self,
        images: List[Optional[Image.Image]],
        image_types: List[str]
    ) -> tuple[bool, Optional[str]]:
        """
        Validate composition inputs.

        Args:
            images: List of images
            image_types: List of image types

        Returns:
            Tuple of (is_valid: bool, error_message: Optional[str])
        """
        # Check at least one image provided
        if not any(img is not None for img in images):
            return False, "At least one image is required"

        # Check image types length matches
        if len(image_types) < len(images):
            return False, "Image types must be specified for all images"

        # Check for valid image types
        for img_type in image_types:
            if img_type not in self.IMAGE_TYPES:
                return False, f"Invalid image type: {img_type}"

        return True, None