Spaces:
Sleeping
Sleeping
File size: 11,171 Bytes
5b6e956 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 | """
Composition Service
===================
Business logic for smart multi-image composition.
Builds intelligent prompts based on image types, camera angles, and lighting.
"""
from typing import Optional, List
from PIL import Image
from services.generation_service import GenerationService
from models.generation_request import GenerationRequest
from models.generation_result import GenerationResult
from utils.logging_utils import get_logger
from config.settings import Settings
logger = get_logger(__name__)
class CompositionService(GenerationService):
"""
Service for intelligent multi-image composition.
Builds prompts based on:
- Image types (Subject, Background, Style, etc.)
- Camera angles and shot types
- Lighting conditions
- Custom instructions
Inherits from GenerationService for generation capabilities.
"""
# Image type options
IMAGE_TYPES = [
"Subject/Character",
"Background/Environment",
"Style Reference",
"Product",
"Texture",
"Not Used"
]
# Shot type options
SHOT_TYPES = [
"close-up shot",
"medium shot",
"full body shot",
"wide shot",
"extreme close-up",
"establishing shot"
]
# Camera angle options
CAMERA_ANGLES = [
"eye-level perspective",
"low-angle perspective",
"high-angle perspective",
"bird's-eye view",
"Dutch angle (tilted)",
"over-the-shoulder"
]
# Lighting options
LIGHTING_OPTIONS = [
"Auto (match images)",
"natural daylight",
"golden hour sunlight",
"soft diffused light",
"dramatic side lighting",
"backlit silhouette",
"studio lighting",
"moody atmospheric lighting",
"neon/artificial lighting"
]
def __init__(self, api_key: Optional[str] = None):
"""
Initialize composition service.
Args:
api_key: Optional Gemini API key
"""
super().__init__(api_key=api_key)
logger.info("CompositionService initialized")
def build_composition_prompt(
self,
image1_type: str = "Subject/Character",
image2_type: str = "Background/Environment",
image3_type: str = "Not Used",
camera_angles: Optional[List[str]] = None,
lighting: str = "Auto (match images)",
shot_type: str = "medium shot",
custom_instructions: str = "",
is_character_sheet: bool = False
) -> str:
"""
Build intelligent composition prompt.
Based on Google's best practices for Gemini 2.5 Flash Image:
- Narrative, descriptive language
- Camera angles, lens types, lighting
- Match perspectives and light direction
- Specific about placement
Args:
image1_type: Type of first image
image2_type: Type of second image
image3_type: Type of third image
camera_angles: List of selected camera angles
lighting: Lighting description
shot_type: Type of shot
custom_instructions: Additional instructions
is_character_sheet: Whether to generate character sheet
Returns:
Formatted prompt string
"""
parts = []
# Character sheet specific handling
if is_character_sheet:
parts.append("Create a character sheet design with multiple views and poses of the same character. ")
if image1_type == "Subject/Character":
parts.append("Based on the character from image one, ")
parts.append("Include front view, side view, back view, and detail shots. ")
parts.append("Maintain consistent character design, colors, and proportions across all views. ")
if image2_type in ["Background/Environment", "Style Reference"]:
parts.append(f"Apply the {image2_type.lower()} from image two as context. ")
else:
# Determine main action based on image types
if image1_type == "Subject/Character" and image2_type == "Background/Environment":
parts.append(f"A photorealistic {shot_type} ")
parts.append(f"placing the subject from image one into the environment from image two. ")
elif image1_type == "Subject/Character" and image2_type == "Style Reference":
parts.append(f"Transform the subject from image one ")
parts.append(f"into the artistic style shown in image two. ")
elif image1_type == "Background/Environment" and image2_type == "Subject/Character":
parts.append(f"A photorealistic {shot_type} ")
parts.append(f"integrating the subject from image two into the environment from image one. ")
else:
# Generic multi-image composition
parts.append("Combine ")
if image1_type != "Not Used":
parts.append(f"the {image1_type.lower()} from image one")
if image2_type != "Not Used":
parts.append(f" with the {image2_type.lower()} from image two")
if image3_type != "Not Used":
parts.append(f" and the {image3_type.lower()} from image three")
parts.append(". ")
# Add camera angle specifics (not for character sheets)
if camera_angles and not is_character_sheet:
angles_text = ", ".join(camera_angles)
parts.append(f"Shot from a {angles_text}. ")
# Add lighting
if lighting and lighting != "Auto (match images)":
parts.append(f"The scene is illuminated by {lighting}, ")
parts.append("matching the lighting direction and quality across all elements. ")
# Add perspective matching (best practice)
if not is_character_sheet:
parts.append("Maintain consistent perspective, scale, and depth. ")
# Add realism keywords
parts.append("Create a natural, seamless composition with realistic shadows and reflections. ")
parts.append("Photorealistic, high quality, professional photography.")
# Add custom instructions
if custom_instructions:
parts.append(f" {custom_instructions}")
return "".join(parts)
def compose_images(
self,
images: List[Optional[Image.Image]],
image_types: List[str],
camera_angles: Optional[List[str]] = None,
lighting: str = "Auto (match images)",
shot_type: str = "medium shot",
custom_instructions: str = "",
is_character_sheet: bool = False,
aspect_ratio: str = "16:9",
temperature: float = 0.7,
backend: str = Settings.BACKEND_GEMINI
) -> GenerationResult:
"""
Compose images using intelligent prompt generation.
Args:
images: List of up to 3 images (None for unused slots)
image_types: List of image types corresponding to images
camera_angles: Selected camera angles
lighting: Lighting option
shot_type: Shot type
custom_instructions: Custom instructions
is_character_sheet: Character sheet mode
aspect_ratio: Output aspect ratio
temperature: Generation temperature
backend: Backend to use
Returns:
GenerationResult object
"""
try:
# Filter out None images and corresponding types
valid_images = []
valid_types = []
for i, img in enumerate(images):
if img is not None and i < len(image_types):
valid_images.append(img)
valid_types.append(image_types[i])
if not valid_images:
logger.error("No valid images provided")
return GenerationResult.error_result("No images provided for composition")
# Pad types to 3 elements
while len(valid_types) < 3:
valid_types.append("Not Used")
# Build prompt
prompt = self.build_composition_prompt(
image1_type=valid_types[0],
image2_type=valid_types[1],
image3_type=valid_types[2],
camera_angles=camera_angles or [],
lighting=lighting,
shot_type=shot_type,
custom_instructions=custom_instructions,
is_character_sheet=is_character_sheet
)
logger.info(f"Composition prompt: {prompt[:200]}...")
# Create request
request = GenerationRequest(
prompt=prompt,
backend=backend,
aspect_ratio=aspect_ratio,
temperature=temperature,
input_images=valid_images
)
# Generate
result = self.router.generate(request)
if result.success:
logger.info("Composition generated successfully")
else:
logger.warning(f"Composition failed: {result.message}")
return result
except Exception as e:
logger.exception(f"Composition error: {e}")
return GenerationResult.error_result(f"Composition error: {str(e)}")
def get_suggested_aspect_ratio(
self,
shot_type: str,
is_character_sheet: bool = False
) -> str:
"""
Suggest aspect ratio based on composition type.
Args:
shot_type: Shot type
is_character_sheet: Character sheet mode
Returns:
Suggested aspect ratio string
"""
if is_character_sheet:
return "16:9" # Wide format for multi-view layout
if shot_type in ["full body shot", "establishing shot", "wide shot"]:
return "16:9" # Landscape for wide shots
elif shot_type in ["close-up shot", "extreme close-up"]:
return "3:4" # Portrait for closeups
else:
return "1:1" # Square for balanced compositions
def validate_composition_inputs(
self,
images: List[Optional[Image.Image]],
image_types: List[str]
) -> tuple[bool, Optional[str]]:
"""
Validate composition inputs.
Args:
images: List of images
image_types: List of image types
Returns:
Tuple of (is_valid: bool, error_message: Optional[str])
"""
# Check at least one image provided
if not any(img is not None for img in images):
return False, "At least one image is required"
# Check image types length matches
if len(image_types) < len(images):
return False, "Image types must be specified for all images"
# Check for valid image types
for img_type in image_types:
if img_type not in self.IMAGE_TYPES:
return False, f"Invalid image type: {img_type}"
return True, None
|