Spaces:

Hammad712
/

Virtual-try-on

Runtime error

App Files Files Community

Hammad712 commited on Dec 10, 2025

Commit

0c779fa

verified ·

1 Parent(s): bbabac5

Update app/services/image_service.py

Browse files

Files changed (1) hide show

app/services/image_service.py +38 -28

app/services/image_service.py CHANGED Viewed

@@ -12,27 +12,28 @@ from app.core.clients import llm, prompt_template, genai_client, flux_client, ge
 logger = logging.getLogger(__name__)
-# --- Virtual Try-On Instruction Template ---
 VIRTUAL_TRY_ON_PROMPT = """
-You are an expert image-editing agent. Perform a high-fidelity virtual try-on using two inputs:
 - Input 1 (dress reference): the exact garment image to be transferred.
 - Input 2 (person): the target person who must wear the garment.
 Positive instructions (Do):
-1. Produce exactly one photorealistic output image showing the person wearing the dress from Input 1.
 2. Preserve the person's face, facial features, expression, hair, gender, skin tone, and body proportions — do NOT change identity.
-3. Reproduce the dress design, color, pattern, fabric texture, and major details (buttons, seams, prints) faithfully — do NOT change these.
-4. Match lighting, shadows, perspective, and scale so the garment appears naturally worn by the person.
-5. If minor geometric adjustments are required to account for pose, make them subtle and keep dress details intact.
 6. Also return a short text JSON summary with the form: {"success": true|false, "notes": "explain any limitations or changes"}.
 Negative instructions (Do NOT):
 1. Do NOT modify the person's face, gender, or identifying features.
-2. Do NOT change the dress color, pattern, or main texture.
 3. Do NOT add extra clothing items, logos, watermarks, offensive symbols, or unrelated props.
 4. Do NOT produce empty responses, placeholders, or images containing text overlays.
-5. Do NOT blur, heavily distort, or crop important parts of the person or dress.
 Output requirements:
 - Return a single photorealistic image (same orientation as the person image) and a short text JSON summary.
@@ -153,23 +154,43 @@ def update_image_with_text(text_instruction: str, image_bytes: bytes) -> tuple[O
 # ===============================================================
-# 🔹 VIRTUAL TRY-ON (DRESS + PERSON)
 # ===============================================================
-def virtual_try_on(dress_image_bytes: bytes, person_image_bytes: bytes) -> tuple[Dict[str, Any], Optional[BytesIO]]:
-    """Perform virtual try-on with Gemini, fallback to Flux if fails."""
     logger.info("Opening images for virtual try-on...")
     try:
         dress_image = Image.open(BytesIO(dress_image_bytes))
         person_image = Image.open(BytesIO(person_image_bytes))
     except Exception:
         raise ValueError("Invalid image data provided.")
     try:
         config = types.GenerateContentConfig(response_modalities=["Text", "Image"])
         response = genai_client1.models.generate_content(
-            model="gemini-2.5-flash-image",
-            contents=[VIRTUAL_TRY_ON_PROMPT, dress_image, person_image],
             config=config
         )
@@ -190,23 +211,12 @@ def virtual_try_on(dress_image_bytes: bytes, person_image_bytes: bytes) -> tuple
         return result_summary, result_image_bytes
     except Exception as e:
-        logger.warning(f"Gemini try-on failed: {e}. Falling back to Flux...")
-        # try:
-        #     prompt = "Photorealistic virtual try-on of a person wearing the given dress."
-        #     image = flux_client.text_to_image(
-        #         prompt,
-        #         model="black-forest-labs/FLUX.1-dev"
-        #     )
-        #     buf = BytesIO()
-        #     image.save(buf, format="PNG")
-        #     buf.seek(0)
-        #     return {"success": True, "notes": "Generated using Flux fallback."}, buf
-        # except Exception as flux_error:
-        #     logger.error(f"Flux fallback failed: {flux_error}", exc_info=True)
-        #     raise
 # ===============================================================
 # 🔹 SHOE IMAGE GENERATION
 # ===============================================================

 logger = logging.getLogger(__name__)
+# --- Virtual Try-On Instruction Template (Updated for 3 Inputs) ---
 VIRTUAL_TRY_ON_PROMPT = """
+You are an expert image-editing agent. Perform a high-fidelity virtual try-on using three inputs:
 - Input 1 (dress reference): the exact garment image to be transferred.
 - Input 2 (person): the target person who must wear the garment.
+- Input 3 (shoes reference): the footwear to be worn.
 Positive instructions (Do):
+1. Produce exactly one photorealistic output image showing the person from Input 2 wearing BOTH the dress from Input 1 and the shoes from Input 3.
 2. Preserve the person's face, facial features, expression, hair, gender, skin tone, and body proportions — do NOT change identity.
+3. Reproduce the dress design, color, pattern, fabric texture, and major details (buttons, seams, prints) faithfully.
+4. Reproduce the shoes faithfully and ensure they fit the person's stance naturally (grounding, shadows).
+5. Match lighting, shadows, perspective, and scale so all items appear naturally worn by the person in the original scene.
 6. Also return a short text JSON summary with the form: {"success": true|false, "notes": "explain any limitations or changes"}.
 Negative instructions (Do NOT):
 1. Do NOT modify the person's face, gender, or identifying features.
+2. Do NOT change the dress/shoe color, pattern, or main texture.
 3. Do NOT add extra clothing items, logos, watermarks, offensive symbols, or unrelated props.
 4. Do NOT produce empty responses, placeholders, or images containing text overlays.
+5. Do NOT blur, heavily distort, or crop important parts of the person, dress, or shoes.
 Output requirements:
 - Return a single photorealistic image (same orientation as the person image) and a short text JSON summary.
 # ===============================================================
+# 🔹 VIRTUAL TRY-ON (DRESS + PERSON + SHOES)
 # ===============================================================
+def virtual_try_on(
+    dress_image_bytes: bytes,
+    person_image_bytes: bytes,
+    shoes_image_bytes: Optional[bytes] = None
+) -> tuple[Dict[str, Any], Optional[BytesIO]]:
+    """Perform virtual try-on with Gemini (Supports optional shoes)."""
     logger.info("Opening images for virtual try-on...")
     try:
         dress_image = Image.open(BytesIO(dress_image_bytes))
         person_image = Image.open(BytesIO(person_image_bytes))
+        # Prepare content list with mandatory items
+        contents = [VIRTUAL_TRY_ON_PROMPT, dress_image, person_image]
+        # Handle optional shoes
+        if shoes_image_bytes:
+            shoes_image = Image.open(BytesIO(shoes_image_bytes))
+            contents.append(shoes_image) # Appends as Input 3
+        else:
+            # If no shoes provided, you might want to append a text note
+            # telling the model to ignore Input 3 instructions,
+            # or simply rely on the model's flexibility.
+            pass
     except Exception:
         raise ValueError("Invalid image data provided.")
     try:
         config = types.GenerateContentConfig(response_modalities=["Text", "Image"])
+        # Make the API call with all collected contents
         response = genai_client1.models.generate_content(
+            model="gemini-2.0-flash-exp", # Ensure this model supports image gen
+            contents=contents,
             config=config
         )
         return result_summary, result_image_bytes
     except Exception as e:
+        logger.warning(f"Gemini try-on failed: {e}")
+        # Re-raise or handle fallback logic here
+        raise HTTPException(status_code=500, detail=f"Model generation failed: {str(e)}")
 # ===============================================================
 # 🔹 SHOE IMAGE GENERATION
 # ===============================================================