VTON_Clothes

Sleeping

App Files Files Community

Maarij-Aqeel commited on Mar 3

Commit

bd3e71e

0 Parent(s):

Added files

Browse files

Files changed (3) hide show

.gitignore +2 -0
app.py +208 -0
requirements.txt +11 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .gradio
2	+ .venv

app.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import gradio as gr
+import torch
+import numpy as np
+import cv2
+from PIL import Image
+import rembg
+from diffusers import AutoPipelineForInpainting
+# ==============================================================================
+# Virtual Try-On (VTON) Application
+# Designed using PyTorch, Diffusers, and Gradio
+#
+# This script serves as the complete application. It handles model loading,
+# image preprocessing, mask generation (via rembg/OpenCV), and inference.
+# ==============================================================================
+# Hardware setup - preferring CUDA if available with fp16 to optimize VRAM
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
+class VTONPipeline:
+    def __init__(self):
+        self.pipe = None
+        self.rembg_session = None
+    def load_models(self):
+        """
+        Loads the necessary generative pipelines and segmentation tools.
+        Loads using torch.float16 for significant VRAM savings.
+        """
+        print(f"Loading Diffusers Model on {DEVICE}...")
+        try:
+            # NOTE: For state-of-the-art IDM-VTON or OOTDiffusion, you would usually load
+            # their specialized UNets and IP-Adapters here.
+            # We are using an SDXL Inpainting baseline to demonstrate the full logic.
+            self.pipe = AutoPipelineForInpainting.from_pretrained(
+                "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
+                torch_dtype=DTYPE,
+                variant="fp16" if DTYPE == torch.float16 else None
+            ).to(DEVICE)
+            # Accelerate and VRAM optimizations
+            if DEVICE == "cuda":
+                self.pipe.enable_model_cpu_offload()
+                # Uncomment the below if xformers is successfully installed
+                # self.pipe.enable_xformers_memory_efficient_attention()
+        except Exception as e:
+            print(f"Failed to load the pipeline: {e}")
+            raise e
+        # Initialize rembg session for background/body mask generation
+        self.rembg_session = rembg.new_session()
+        print("Models loaded successfully.")
+    def generate_mask(self, person_image: Image.Image) -> Image.Image:
+        """
+        [CRUCIAL] Generates a mask indicating where the new garment will be drawn.
+        Uses rembg to isolate the person and morphological operations to create the inpaint mask.
+        """
+        # 1. Use rembg to extract the person from the background
+        person_no_bg = rembg.remove(person_image, session=self.rembg_session)
+        np_img = np.array(person_no_bg)
+        # 2. Extract the alpha channel (0 = background, 255 = person)
+        alpha = np_img[:, :, 3]
+        # 3. Create a mask over the clothing area. In a pure VTON application,
+        # a human parser (like Graphonomy or MediaPipe pose) is used to pinpoint the shirt.
+        # Here we approximate by capturing the body silhouette and dilating it slightly
+        # to ensure the garment bounds are fully covered for inpainting.
+        kernel = np.ones((25, 25), np.uint8)
+        dilated_mask = cv2.dilate(alpha, kernel, iterations=1)
+        # 4. Convert back to PIL Image, grayscale ('L')
+        mask_image = Image.fromarray(dilated_mask).convert("L")
+        return mask_image
+    def preprocess_image(self, image: Image.Image, target_size=(768, 1024)) -> Image.Image:
+        """
+        Resizes and center-crops the input image to fit the specific resolution
+        required by diffusion models (usually 768x1024 for VTON like IDM-VTON).
+        """
+        w, h = image.size
+        target_w, target_h = target_size
+        # Calculate aspect ratio
+        ratio = max(target_w / w, target_h / h)
+        new_w, new_h = int(w * ratio), int(h * ratio)
+        # Resize using high-quality Lanczos filter
+        img_resized = image.resize((new_w, new_h), Image.LANCZOS)
+        # Center crop precisely
+        left = (new_w - target_w) / 2
+        top = (new_h - target_h) / 2
+        right = (new_w + target_w) / 2
+        bottom = (new_h + target_h) / 2
+        return img_resized.crop((left, top, right, bottom))
+    def try_on(self, person_img: Image.Image, garment_img: Image.Image) -> Image.Image:
+        """
+        Core inference pipeline.
+        Executes the heavy lifting: preprocessing -> masking -> diffusion loop.
+        """
+        if person_img is None or garment_img is None:
+            raise gr.Error("Both 'Target Person' and 'Garment' images are required.")
+        # Lazy load models to speed up initial app startup
+        if self.pipe is None:
+            self.load_models()
+        print("Preprocessing Inputs...")
+        # Most VTON networks operate optimally at 768 Width x 1024 Height
+        target_resolution = (768, 1024)
+        person_prepared = self.preprocess_image(person_img.convert("RGB"), target_resolution)
+        garment_prepared = self.preprocess_image(garment_img.convert("RGB"), target_resolution)
+        print("Generating Mask...")
+        mask_prepared = self.generate_mask(person_prepared)
+        print("Running Inpainting Inference...")
+        # Prompting: Describe exactly what we want.
+        # Note: Advanced architectures like IDM-VTON will use IP-Adapter
+        # to embed the garment_img directly as semantic features.
+        prompt = "photorealistic, a person wearing the provided garment, perfect fit, detailed fabric texture, high quality, 8k"
+        negative_prompt = "deformed, ugly, bad anatomy, bad lighting, blurry, low resolution, artifacts, extra limbs"
+        # Perform Inference
+        result_img = self.pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            image=person_prepared,
+            mask_image=mask_prepared,
+            num_inference_steps=30,      # Balance between speed and quality
+            guidance_scale=7.5,
+        ).images[0]
+        return result_img
+# Instantiate the logic runner
+vton_worker = VTONPipeline()
+def run_vton_interface(person_img, garment_img):
+    """
+    Gradio wrapper function to capture output and gracefully catch errors.
+    """
+    try:
+        # Pass to backend processor
+        output = vton_worker.try_on(person_img, garment_img)
+        return output
+    except gr.Error as ge:
+        # Standard gradio errors shown instantly to users
+        raise ge
+    except Exception as e:
+        # Unexpected errors logged and displayed appropriately
+        print(f"Exception during Try-On: {e}")
+        raise gr.Error(f"Model Inference Failed: {str(e)}")
+# ==============================================================================
+# Gradio Web UI
+# ==============================================================================
+def create_ui():
+    """
+    Constructs an aesthetically pleasing user interface utilizing Gradio.
+    """
+    with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", font=[gr.themes.GoogleFont("Inter")])) as demo:
+        gr.Markdown(
+            """
+            <div style="text-align: center; margin-bottom: 20px;">
+                <h1>👗 AI Virtual Try-On (VTON) Studio</h1>
+                <p>Upload a photo of a person and an isolated clothing garment to digitally try it on using Generative AI.</p>
+            </div>
+            """
+        )
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### 1. Upload Inputs")
+                person_input = gr.Image(type="pil", label="👤 Target Person", height=450)
+                garment_input = gr.Image(type="pil", label="👕 Garment to Try", height=450)
+                generate_btn = gr.Button("✨ Generate Try-On", variant="primary", size="lg")
+            with gr.Column(scale=1):
+                gr.Markdown("### 2. Resulting Image")
+                output_image = gr.Image(type="pil", label="Expected Result", interactive=False, height=930)
+        # Connect front-end inputs to back-end function
+        # A loading spinner is automatically displayed on `generate_btn` click.
+        generate_btn.click(
+            fn=run_vton_interface,
+            inputs=[person_input, garment_input],
+            outputs=[output_image],
+            api_name="generate"
+        )
+    return demo
+if __name__ == "__main__":
+    app = create_ui()
+    # Queue is required to handle concurrent tasks safely
+    app.queue()
+    app.launch(server_name="0.0.0.0", server_port=7860, share=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+torch>=2.0.0
+torchvision
+diffusers>=0.28.0
+transformers
+accelerate
+gradio
+rembg
+pillow
+opencv-python
+numpy
+xformers