Virtual-try-on / app /services /image_service.py
Hammad712's picture
Update app/services/image_service.py
74dbc5e verified
import base64
import logging
import json
from io import BytesIO
from typing import Optional, Dict, Any, Tuple, List
from fastapi import HTTPException
from PIL import Image
from google.genai import types
# Import initialized clients (ensure flux_client is defined in app/core/clients.py)
from app.core.clients import llm, prompt_template, genai_client, flux_client, genai_client1
logger = logging.getLogger(__name__)
# --- Virtual Try-On Instruction Template (Updated for 3 Inputs) ---
VIRTUAL_TRY_ON_PROMPT = """
You are an expert image synthesis agent. Your PRIMARY and MANDATORY directive is to generate a high-fidelity virtual try-on image using the three provided inputs against a clean background.
- Input 1 (dress reference): the garment to be transferred.
- Input 2 (person): the target person.
- Input 3 (shoes reference): the footwear.
CRITICAL: You MUST generate an image output in every single response. Do not refuse to generate an image. Make your best attempt at a photorealistic composition.
Positive instructions (Do):
1. Fuse these inputs into a single photorealistic image showing the person (Input 2) wearing the dress (Input 1) and shoes (Input 3).
2. **Set the entire background to solid, seamless, plain white.**
3. Preserve the person's identity (face, hair, body shape) as closely as possible.
4. Retain the exact textures and details of the dress and shoes.
5. Ensure the lighting is bright, clean, studio-quality light that complements the white background.
6. Return a short JSON summary alongside the image.
Negative instructions (Do NOT):
1. Do NOT return only text. An image is required.
2. **Do NOT retain any background elements, scenery, walls, or furniture from the original inputs. The background must be completely empty.**
3. Do NOT change the person's face or gender.
4. Do NOT add watermarks, text overlays, or multiple angles (collages).
5. Do NOT alter the core design of the dress or shoes.
Output requirements:
- MANDATORY: 1 Photorealistic Image file isolated on a solid white background.
- Secondary: JSON text summary: {"success": true, "notes": "Image generated successfully on white background"}.
"""
# ===============================================================
# 🔹 PROMPT ENHANCEMENT
# ===============================================================
def enhance_user_prompt(raw_prompt: str) -> str:
"""Enhance user prompt using LLM"""
logger.info(f"Enhancing prompt: {raw_prompt[:50]}...")
formatted_prompt = prompt_template.invoke({"Raw_Prompt": raw_prompt})
try:
response = llm.invoke(formatted_prompt)
logger.info("Prompt enhancement successful.")
return response.content
except Exception as e:
logger.error(f"Error during prompt enhancement: {e}", exc_info=True)
raise
# ===============================================================
# 🔹 IMAGE GENERATION (TEXT → IMAGE) WITH FALLBACK
# ===============================================================
def generate_image_from_text(image_prompt: str) -> tuple[Optional[str], Optional[BytesIO]]:
"""Generate an image from a text prompt with Gemini → Flux fallback"""
logger.info(f"Generating image with prompt: {image_prompt[:50]}...")
generation_config = types.GenerateContentConfig(response_modalities=['Text', 'Image'])
try:
# Try Gemini first
response = genai_client.models.generate_content(
model="gemini-2.5-flash-image",
contents=image_prompt,
config=generation_config
)
logger.info("Gemini image generation successful.")
generated_text, generated_image_bytes = None, None
for part in response.candidates[0].content.parts:
if part.text:
generated_text = part.text
elif getattr(part, "inline_data", None):
generated_image_bytes = BytesIO(part.inline_data.data)
return generated_text, generated_image_bytes
except Exception as e:
logger.warning(f"Gemini image generation failed: {e}. Falling back to Flux...")
try:
image = flux_client.text_to_image(
image_prompt,
model="black-forest-labs/FLUX.1-dev"
)
buf = BytesIO()
image.save(buf, format="PNG")
buf.seek(0)
return None, buf
except Exception as flux_error:
logger.error(f"Flux fallback failed: {flux_error}", exc_info=True)
raise
# ===============================================================
# 🔹 IMAGE UPDATE (EDIT EXISTING IMAGE WITH TEXT)
# ===============================================================
def update_image_with_text(text_instruction: str, image_bytes: bytes) -> tuple[Optional[str], Optional[BytesIO]]:
"""Update an existing image using Gemini, fallback to Flux if fails."""
logger.info("Opening image for update...")
try:
image = Image.open(BytesIO(image_bytes))
except Exception:
raise ValueError("Invalid image data. Upload a valid image file.")
generation_config = types.GenerateContentConfig(response_modalities=['Text', 'Image'])
try:
response = genai_client.models.generate_content(
model="gemini-2.5-flash-image",
contents=[text_instruction, image],
config=generation_config
)
logger.info("Gemini image update successful.")
updated_text, updated_image_bytes = None, None
for part in response.candidates[0].content.parts:
if part.text:
updated_text = part.text
elif getattr(part, "inline_data", None):
updated_image_bytes = BytesIO(part.inline_data.data)
return updated_text, updated_image_bytes
except Exception as e:
logger.warning(f"Gemini update failed: {e}. Falling back to Flux edit...")
try:
image = flux_client.text_to_image(
f"Edit image based on instruction: {text_instruction}",
model="black-forest-labs/FLUX.1-dev"
)
buf = BytesIO()
image.save(buf, format="PNG")
buf.seek(0)
return None, buf
except Exception as flux_error:
logger.error(f"Flux fallback failed: {flux_error}", exc_info=True)
raise
# ===============================================================
# 🔹 VIRTUAL TRY-ON (DRESS + PERSON + SHOES)
# ===============================================================
def virtual_try_on(
dress_image_bytes: bytes,
person_image_bytes: bytes,
shoes_image_bytes: Optional[bytes] = None
) -> tuple[Dict[str, Any], Optional[BytesIO]]:
"""Perform virtual try-on with Gemini (Supports optional shoes)."""
logger.info("Opening images for virtual try-on...")
try:
dress_image = Image.open(BytesIO(dress_image_bytes))
person_image = Image.open(BytesIO(person_image_bytes))
# Prepare content list with mandatory items
contents = [VIRTUAL_TRY_ON_PROMPT, dress_image, person_image]
# Handle optional shoes
if shoes_image_bytes:
shoes_image = Image.open(BytesIO(shoes_image_bytes))
contents.append(shoes_image) # Appends as Input 3
else:
# If no shoes provided, you might want to append a text note
# telling the model to ignore Input 3 instructions,
# or simply rely on the model's flexibility.
pass
except Exception:
raise ValueError("Invalid image data provided.")
try:
config = types.GenerateContentConfig(response_modalities=["Text", "Image"])
# Make the API call with all collected contents
response = genai_client1.models.generate_content(
model="gemini-2.5-flash-image", # Ensure this model supports image gen
contents=contents,
config=config
)
logger.info("Gemini virtual try-on successful.")
result_summary = {"success": False, "notes": "No text response."}
result_image_bytes = None
for part in response.candidates[0].content.parts:
if part.text:
try:
result_summary = json.loads(part.text)
except json.JSONDecodeError:
result_summary = {"success": False, "notes": part.text}
elif getattr(part, "inline_data", None):
result_image_bytes = BytesIO(part.inline_data.data)
return result_summary, result_image_bytes
except Exception as e:
logger.warning(f"Gemini try-on failed: {e}")
# Re-raise or handle fallback logic here
raise HTTPException(status_code=500, detail=f"Model generation failed: {str(e)}")
# ===============================================================
# 🔹 SHOE IMAGE GENERATION (UPDATED FOR SINGLE PROMPT)
# ===============================================================
def generate_shoe_images(prompt: str) -> Tuple[Optional[str], List[BytesIO]]:
"""Generate photorealistic shoe images from a raw user prompt."""
logger.info("Generating shoe images from prompt...")
# Wrap the prompt to ensure white background and high quality
final_prompt = f"""
Produce a photorealistic product image of a shoe. with white background
User Description: {prompt}
Requirements:
- Background: solid plain white / studio background.
- Quality: High detail, 8k resolution, realistic textures, clean studio lighting.
- Focus: The entire shoe must be visible.
"""
config = types.GenerateContentConfig(response_modalities=["Text", "Image"])
try:
# Try Gemini First
response = genai_client.models.generate_content(
model="gemini-2.5-flash-image",
contents=final_prompt,
config=config
)
generated_text, images = None, []
for part in response.candidates[0].content.parts:
if part.text:
generated_text = part.text
elif getattr(part, "inline_data", None):
images.append(BytesIO(part.inline_data.data))
return generated_text, images
except Exception as e:
logger.warning(f"Gemini shoe generation failed: {e}. Falling back to Flux...")
try:
# Fallback to Flux
image = flux_client.text_to_image(
final_prompt,
model="black-forest-labs/FLUX.1-dev"
)
buf = BytesIO()
image.save(buf, format="PNG")
buf.seek(0)
return None, [buf]
except Exception as flux_error:
logger.error(f"Flux fallback failed: {flux_error}", exc_info=True)
raise