Spaces:

Hammad712
/

Virtual-try-on

Runtime error

App Files Files Community

Virtual-try-on / app /services /image_service.py

Hammad712

Update app/services/image_service.py

74dbc5e verified 24 days ago

raw

history blame contribute delete

10.8 kB

	import base64
	import logging
	import json
	from io import BytesIO
	from typing import Optional, Dict, Any, Tuple, List

	from fastapi import HTTPException
	from PIL import Image
	from google.genai import types

	# Import initialized clients (ensure flux_client is defined in app/core/clients.py)
	from app.core.clients import llm, prompt_template, genai_client, flux_client, genai_client1

	logger = logging.getLogger(__name__)

	# --- Virtual Try-On Instruction Template (Updated for 3 Inputs) ---
	VIRTUAL_TRY_ON_PROMPT = """
	You are an expert image synthesis agent. Your PRIMARY and MANDATORY directive is to generate a high-fidelity virtual try-on image using the three provided inputs against a clean background.

	- Input 1 (dress reference): the garment to be transferred.
	- Input 2 (person): the target person.
	- Input 3 (shoes reference): the footwear.

	CRITICAL: You MUST generate an image output in every single response. Do not refuse to generate an image. Make your best attempt at a photorealistic composition.

	Positive instructions (Do):
	1. Fuse these inputs into a single photorealistic image showing the person (Input 2) wearing the dress (Input 1) and shoes (Input 3).
	2. Set the entire background to solid, seamless, plain white.
	3. Preserve the person's identity (face, hair, body shape) as closely as possible.
	4. Retain the exact textures and details of the dress and shoes.
	5. Ensure the lighting is bright, clean, studio-quality light that complements the white background.
	6. Return a short JSON summary alongside the image.

	Negative instructions (Do NOT):
	1. Do NOT return only text. An image is required.
	2. Do NOT retain any background elements, scenery, walls, or furniture from the original inputs. The background must be completely empty.
	3. Do NOT change the person's face or gender.
	4. Do NOT add watermarks, text overlays, or multiple angles (collages).
	5. Do NOT alter the core design of the dress or shoes.

	Output requirements:
	- MANDATORY: 1 Photorealistic Image file isolated on a solid white background.
	- Secondary: JSON text summary: {"success": true, "notes": "Image generated successfully on white background"}.
	"""


	# ===============================================================
	# 🔹 PROMPT ENHANCEMENT
	# ===============================================================
	def enhance_user_prompt(raw_prompt: str) -> str:
	"""Enhance user prompt using LLM"""
	logger.info(f"Enhancing prompt: {raw_prompt[:50]}...")
	formatted_prompt = prompt_template.invoke({"Raw_Prompt": raw_prompt})

	try:
	response = llm.invoke(formatted_prompt)
	logger.info("Prompt enhancement successful.")
	return response.content
	except Exception as e:
	logger.error(f"Error during prompt enhancement: {e}", exc_info=True)
	raise


	# ===============================================================
	# 🔹 IMAGE GENERATION (TEXT → IMAGE) WITH FALLBACK
	# ===============================================================
	def generate_image_from_text(image_prompt: str) -> tuple[Optional[str], Optional[BytesIO]]:
	"""Generate an image from a text prompt with Gemini → Flux fallback"""
	logger.info(f"Generating image with prompt: {image_prompt[:50]}...")

	generation_config = types.GenerateContentConfig(response_modalities=['Text', 'Image'])

	try:
	# Try Gemini first
	response = genai_client.models.generate_content(
	model="gemini-2.5-flash-image",
	contents=image_prompt,
	config=generation_config
	)

	logger.info("Gemini image generation successful.")

	generated_text, generated_image_bytes = None, None
	for part in response.candidates[0].content.parts:
	if part.text:
	generated_text = part.text
	elif getattr(part, "inline_data", None):
	generated_image_bytes = BytesIO(part.inline_data.data)

	return generated_text, generated_image_bytes

	except Exception as e:
	logger.warning(f"Gemini image generation failed: {e}. Falling back to Flux...")

	try:
	image = flux_client.text_to_image(
	image_prompt,
	model="black-forest-labs/FLUX.1-dev"
	)
	buf = BytesIO()
	image.save(buf, format="PNG")
	buf.seek(0)
	return None, buf
	except Exception as flux_error:
	logger.error(f"Flux fallback failed: {flux_error}", exc_info=True)
	raise


	# ===============================================================
	# 🔹 IMAGE UPDATE (EDIT EXISTING IMAGE WITH TEXT)
	# ===============================================================
	def update_image_with_text(text_instruction: str, image_bytes: bytes) -> tuple[Optional[str], Optional[BytesIO]]:
	"""Update an existing image using Gemini, fallback to Flux if fails."""
	logger.info("Opening image for update...")

	try:
	image = Image.open(BytesIO(image_bytes))
	except Exception:
	raise ValueError("Invalid image data. Upload a valid image file.")

	generation_config = types.GenerateContentConfig(response_modalities=['Text', 'Image'])

	try:
	response = genai_client.models.generate_content(
	model="gemini-2.5-flash-image",
	contents=[text_instruction, image],
	config=generation_config
	)

	logger.info("Gemini image update successful.")

	updated_text, updated_image_bytes = None, None
	for part in response.candidates[0].content.parts:
	if part.text:
	updated_text = part.text
	elif getattr(part, "inline_data", None):
	updated_image_bytes = BytesIO(part.inline_data.data)

	return updated_text, updated_image_bytes

	except Exception as e:
	logger.warning(f"Gemini update failed: {e}. Falling back to Flux edit...")

	try:
	image = flux_client.text_to_image(
	f"Edit image based on instruction: {text_instruction}",
	model="black-forest-labs/FLUX.1-dev"
	)
	buf = BytesIO()
	image.save(buf, format="PNG")
	buf.seek(0)
	return None, buf
	except Exception as flux_error:
	logger.error(f"Flux fallback failed: {flux_error}", exc_info=True)
	raise


	# ===============================================================
	# 🔹 VIRTUAL TRY-ON (DRESS + PERSON + SHOES)
	# ===============================================================
	def virtual_try_on(
	dress_image_bytes: bytes,
	person_image_bytes: bytes,
	shoes_image_bytes: Optional[bytes] = None
	) -> tuple[Dict[str, Any], Optional[BytesIO]]:
	"""Perform virtual try-on with Gemini (Supports optional shoes)."""
	logger.info("Opening images for virtual try-on...")

	try:
	dress_image = Image.open(BytesIO(dress_image_bytes))
	person_image = Image.open(BytesIO(person_image_bytes))

	# Prepare content list with mandatory items
	contents = [VIRTUAL_TRY_ON_PROMPT, dress_image, person_image]

	# Handle optional shoes
	if shoes_image_bytes:
	shoes_image = Image.open(BytesIO(shoes_image_bytes))
	contents.append(shoes_image) # Appends as Input 3
	else:
	# If no shoes provided, you might want to append a text note
	# telling the model to ignore Input 3 instructions,
	# or simply rely on the model's flexibility.
	pass

	except Exception:
	raise ValueError("Invalid image data provided.")

	try:
	config = types.GenerateContentConfig(response_modalities=["Text", "Image"])

	# Make the API call with all collected contents
	response = genai_client1.models.generate_content(
	model="gemini-2.5-flash-image", # Ensure this model supports image gen
	contents=contents,
	config=config
	)

	logger.info("Gemini virtual try-on successful.")

	result_summary = {"success": False, "notes": "No text response."}
	result_image_bytes = None

	for part in response.candidates[0].content.parts:
	if part.text:
	try:
	result_summary = json.loads(part.text)
	except json.JSONDecodeError:
	result_summary = {"success": False, "notes": part.text}
	elif getattr(part, "inline_data", None):
	result_image_bytes = BytesIO(part.inline_data.data)

	return result_summary, result_image_bytes

	except Exception as e:
	logger.warning(f"Gemini try-on failed: {e}")
	# Re-raise or handle fallback logic here
	raise HTTPException(status_code=500, detail=f"Model generation failed: {str(e)}")



	# ===============================================================
	# 🔹 SHOE IMAGE GENERATION (UPDATED FOR SINGLE PROMPT)
	# ===============================================================
	def generate_shoe_images(prompt: str) -> Tuple[Optional[str], List[BytesIO]]:
	"""Generate photorealistic shoe images from a raw user prompt."""
	logger.info("Generating shoe images from prompt...")

	# Wrap the prompt to ensure white background and high quality
	final_prompt = f"""
	Produce a photorealistic product image of a shoe. with white background

	User Description: {prompt}

	Requirements:
	- Background: solid plain white / studio background.
	- Quality: High detail, 8k resolution, realistic textures, clean studio lighting.
	- Focus: The entire shoe must be visible.
	"""

	config = types.GenerateContentConfig(response_modalities=["Text", "Image"])

	try:
	# Try Gemini First
	response = genai_client.models.generate_content(
	model="gemini-2.5-flash-image",
	contents=final_prompt,
	config=config
	)

	generated_text, images = None, []

	for part in response.candidates[0].content.parts:
	if part.text:
	generated_text = part.text
	elif getattr(part, "inline_data", None):
	images.append(BytesIO(part.inline_data.data))

	return generated_text, images

	except Exception as e:
	logger.warning(f"Gemini shoe generation failed: {e}. Falling back to Flux...")

	try:
	# Fallback to Flux
	image = flux_client.text_to_image(
	final_prompt,
	model="black-forest-labs/FLUX.1-dev"
	)
	buf = BytesIO()
	image.save(buf, format="PNG")
	buf.seek(0)
	return None, [buf]
	except Exception as flux_error:
	logger.error(f"Flux fallback failed: {flux_error}", exc_info=True)
	raise