Spaces:
Runtime error
Runtime error
| import base64 | |
| import logging | |
| import json | |
| from io import BytesIO | |
| from typing import Optional, Dict, Any, Tuple, List | |
| from fastapi import HTTPException | |
| from PIL import Image | |
| from google.genai import types | |
| # Import initialized clients (ensure flux_client is defined in app/core/clients.py) | |
| from app.core.clients import llm, prompt_template, genai_client, flux_client, genai_client1 | |
| logger = logging.getLogger(__name__) | |
| # --- Virtual Try-On Instruction Template (Updated for 3 Inputs) --- | |
| VIRTUAL_TRY_ON_PROMPT = """ | |
| You are an expert image synthesis agent. Your PRIMARY and MANDATORY directive is to generate a high-fidelity virtual try-on image using the three provided inputs against a clean background. | |
| - Input 1 (dress reference): the garment to be transferred. | |
| - Input 2 (person): the target person. | |
| - Input 3 (shoes reference): the footwear. | |
| CRITICAL: You MUST generate an image output in every single response. Do not refuse to generate an image. Make your best attempt at a photorealistic composition. | |
| Positive instructions (Do): | |
| 1. Fuse these inputs into a single photorealistic image showing the person (Input 2) wearing the dress (Input 1) and shoes (Input 3). | |
| 2. **Set the entire background to solid, seamless, plain white.** | |
| 3. Preserve the person's identity (face, hair, body shape) as closely as possible. | |
| 4. Retain the exact textures and details of the dress and shoes. | |
| 5. Ensure the lighting is bright, clean, studio-quality light that complements the white background. | |
| 6. Return a short JSON summary alongside the image. | |
| Negative instructions (Do NOT): | |
| 1. Do NOT return only text. An image is required. | |
| 2. **Do NOT retain any background elements, scenery, walls, or furniture from the original inputs. The background must be completely empty.** | |
| 3. Do NOT change the person's face or gender. | |
| 4. Do NOT add watermarks, text overlays, or multiple angles (collages). | |
| 5. Do NOT alter the core design of the dress or shoes. | |
| Output requirements: | |
| - MANDATORY: 1 Photorealistic Image file isolated on a solid white background. | |
| - Secondary: JSON text summary: {"success": true, "notes": "Image generated successfully on white background"}. | |
| """ | |
| # =============================================================== | |
| # 🔹 PROMPT ENHANCEMENT | |
| # =============================================================== | |
| def enhance_user_prompt(raw_prompt: str) -> str: | |
| """Enhance user prompt using LLM""" | |
| logger.info(f"Enhancing prompt: {raw_prompt[:50]}...") | |
| formatted_prompt = prompt_template.invoke({"Raw_Prompt": raw_prompt}) | |
| try: | |
| response = llm.invoke(formatted_prompt) | |
| logger.info("Prompt enhancement successful.") | |
| return response.content | |
| except Exception as e: | |
| logger.error(f"Error during prompt enhancement: {e}", exc_info=True) | |
| raise | |
| # =============================================================== | |
| # 🔹 IMAGE GENERATION (TEXT → IMAGE) WITH FALLBACK | |
| # =============================================================== | |
| def generate_image_from_text(image_prompt: str) -> tuple[Optional[str], Optional[BytesIO]]: | |
| """Generate an image from a text prompt with Gemini → Flux fallback""" | |
| logger.info(f"Generating image with prompt: {image_prompt[:50]}...") | |
| generation_config = types.GenerateContentConfig(response_modalities=['Text', 'Image']) | |
| try: | |
| # Try Gemini first | |
| response = genai_client.models.generate_content( | |
| model="gemini-2.5-flash-image", | |
| contents=image_prompt, | |
| config=generation_config | |
| ) | |
| logger.info("Gemini image generation successful.") | |
| generated_text, generated_image_bytes = None, None | |
| for part in response.candidates[0].content.parts: | |
| if part.text: | |
| generated_text = part.text | |
| elif getattr(part, "inline_data", None): | |
| generated_image_bytes = BytesIO(part.inline_data.data) | |
| return generated_text, generated_image_bytes | |
| except Exception as e: | |
| logger.warning(f"Gemini image generation failed: {e}. Falling back to Flux...") | |
| try: | |
| image = flux_client.text_to_image( | |
| image_prompt, | |
| model="black-forest-labs/FLUX.1-dev" | |
| ) | |
| buf = BytesIO() | |
| image.save(buf, format="PNG") | |
| buf.seek(0) | |
| return None, buf | |
| except Exception as flux_error: | |
| logger.error(f"Flux fallback failed: {flux_error}", exc_info=True) | |
| raise | |
| # =============================================================== | |
| # 🔹 IMAGE UPDATE (EDIT EXISTING IMAGE WITH TEXT) | |
| # =============================================================== | |
| def update_image_with_text(text_instruction: str, image_bytes: bytes) -> tuple[Optional[str], Optional[BytesIO]]: | |
| """Update an existing image using Gemini, fallback to Flux if fails.""" | |
| logger.info("Opening image for update...") | |
| try: | |
| image = Image.open(BytesIO(image_bytes)) | |
| except Exception: | |
| raise ValueError("Invalid image data. Upload a valid image file.") | |
| generation_config = types.GenerateContentConfig(response_modalities=['Text', 'Image']) | |
| try: | |
| response = genai_client.models.generate_content( | |
| model="gemini-2.5-flash-image", | |
| contents=[text_instruction, image], | |
| config=generation_config | |
| ) | |
| logger.info("Gemini image update successful.") | |
| updated_text, updated_image_bytes = None, None | |
| for part in response.candidates[0].content.parts: | |
| if part.text: | |
| updated_text = part.text | |
| elif getattr(part, "inline_data", None): | |
| updated_image_bytes = BytesIO(part.inline_data.data) | |
| return updated_text, updated_image_bytes | |
| except Exception as e: | |
| logger.warning(f"Gemini update failed: {e}. Falling back to Flux edit...") | |
| try: | |
| image = flux_client.text_to_image( | |
| f"Edit image based on instruction: {text_instruction}", | |
| model="black-forest-labs/FLUX.1-dev" | |
| ) | |
| buf = BytesIO() | |
| image.save(buf, format="PNG") | |
| buf.seek(0) | |
| return None, buf | |
| except Exception as flux_error: | |
| logger.error(f"Flux fallback failed: {flux_error}", exc_info=True) | |
| raise | |
| # =============================================================== | |
| # 🔹 VIRTUAL TRY-ON (DRESS + PERSON + SHOES) | |
| # =============================================================== | |
| def virtual_try_on( | |
| dress_image_bytes: bytes, | |
| person_image_bytes: bytes, | |
| shoes_image_bytes: Optional[bytes] = None | |
| ) -> tuple[Dict[str, Any], Optional[BytesIO]]: | |
| """Perform virtual try-on with Gemini (Supports optional shoes).""" | |
| logger.info("Opening images for virtual try-on...") | |
| try: | |
| dress_image = Image.open(BytesIO(dress_image_bytes)) | |
| person_image = Image.open(BytesIO(person_image_bytes)) | |
| # Prepare content list with mandatory items | |
| contents = [VIRTUAL_TRY_ON_PROMPT, dress_image, person_image] | |
| # Handle optional shoes | |
| if shoes_image_bytes: | |
| shoes_image = Image.open(BytesIO(shoes_image_bytes)) | |
| contents.append(shoes_image) # Appends as Input 3 | |
| else: | |
| # If no shoes provided, you might want to append a text note | |
| # telling the model to ignore Input 3 instructions, | |
| # or simply rely on the model's flexibility. | |
| pass | |
| except Exception: | |
| raise ValueError("Invalid image data provided.") | |
| try: | |
| config = types.GenerateContentConfig(response_modalities=["Text", "Image"]) | |
| # Make the API call with all collected contents | |
| response = genai_client1.models.generate_content( | |
| model="gemini-2.5-flash-image", # Ensure this model supports image gen | |
| contents=contents, | |
| config=config | |
| ) | |
| logger.info("Gemini virtual try-on successful.") | |
| result_summary = {"success": False, "notes": "No text response."} | |
| result_image_bytes = None | |
| for part in response.candidates[0].content.parts: | |
| if part.text: | |
| try: | |
| result_summary = json.loads(part.text) | |
| except json.JSONDecodeError: | |
| result_summary = {"success": False, "notes": part.text} | |
| elif getattr(part, "inline_data", None): | |
| result_image_bytes = BytesIO(part.inline_data.data) | |
| return result_summary, result_image_bytes | |
| except Exception as e: | |
| logger.warning(f"Gemini try-on failed: {e}") | |
| # Re-raise or handle fallback logic here | |
| raise HTTPException(status_code=500, detail=f"Model generation failed: {str(e)}") | |
| # =============================================================== | |
| # 🔹 SHOE IMAGE GENERATION (UPDATED FOR SINGLE PROMPT) | |
| # =============================================================== | |
| def generate_shoe_images(prompt: str) -> Tuple[Optional[str], List[BytesIO]]: | |
| """Generate photorealistic shoe images from a raw user prompt.""" | |
| logger.info("Generating shoe images from prompt...") | |
| # Wrap the prompt to ensure white background and high quality | |
| final_prompt = f""" | |
| Produce a photorealistic product image of a shoe. with white background | |
| User Description: {prompt} | |
| Requirements: | |
| - Background: solid plain white / studio background. | |
| - Quality: High detail, 8k resolution, realistic textures, clean studio lighting. | |
| - Focus: The entire shoe must be visible. | |
| """ | |
| config = types.GenerateContentConfig(response_modalities=["Text", "Image"]) | |
| try: | |
| # Try Gemini First | |
| response = genai_client.models.generate_content( | |
| model="gemini-2.5-flash-image", | |
| contents=final_prompt, | |
| config=config | |
| ) | |
| generated_text, images = None, [] | |
| for part in response.candidates[0].content.parts: | |
| if part.text: | |
| generated_text = part.text | |
| elif getattr(part, "inline_data", None): | |
| images.append(BytesIO(part.inline_data.data)) | |
| return generated_text, images | |
| except Exception as e: | |
| logger.warning(f"Gemini shoe generation failed: {e}. Falling back to Flux...") | |
| try: | |
| # Fallback to Flux | |
| image = flux_client.text_to_image( | |
| final_prompt, | |
| model="black-forest-labs/FLUX.1-dev" | |
| ) | |
| buf = BytesIO() | |
| image.save(buf, format="PNG") | |
| buf.seek(0) | |
| return None, [buf] | |
| except Exception as flux_error: | |
| logger.error(f"Flux fallback failed: {flux_error}", exc_info=True) | |
| raise |