Spaces:

beertoshi
/

Unthotifai

Sleeping

App Files Files Community

beertoshi commited on May 27, 2025

Commit

5435d62

verified ·

1 Parent(s): 3af7399

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -160

app.py CHANGED Viewed

@@ -1,203 +1,149 @@
 import gradio as gr
 import torch
-from diffusers import StableDiffusionInpaintPipeline
 from PIL import Image, ImageDraw, ImageFilter
 import numpy as np
 import spaces
-# Load model
 pipe = StableDiffusionInpaintPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-2-inpainting",
     torch_dtype=torch.float16,
     safety_checker=None,
     requires_safety_checker=False
 )
 pipe.enable_attention_slicing()
-CLOTHES = {
-    "Indian Sari": "woman wearing beautiful red and gold sari, traditional Indian dress, high quality photo",
-    "Japanese Kimono": "person wearing elegant kimono, traditional Japanese clothing, professional photo",
-    "African Dashiki": "person wearing colorful dashiki, traditional African clothing, detailed",
-    "Chinese Qipao": "woman wearing elegant qipao dress, traditional Chinese clothing",
-    "Scottish Kilt": "man wearing Scottish kilt, traditional highland dress",
-    "Middle Eastern Thobe": "person wearing white thobe, traditional Middle Eastern clothing"
 }
-def make_divisible_by_8(width, height):
-    """Ensure dimensions are divisible by 8"""
-    return width - (width % 8), height - (height % 8)
-def create_body_mask(image_size):
-    """Create mask for body area only"""
-    width, height = image_size
     mask = Image.new('L', (width, height), 0)
     draw = ImageDraw.Draw(mask)
-    # Body area (avoiding face)
-    top = height * 0.35  # Start below face
-    left = width * 0.1
-    right = width * 0.9
-    bottom = height * 0.98
-    draw.ellipse([left, top, right, bottom], fill=255)
-    mask = mask.filter(ImageFilter.GaussianBlur(radius=25))
     return mask
-@spaces.GPU(duration=90)
-def generate_clothing(input_image, clothing_type, quality_mode="balanced"):
     if input_image is None:
         return None, "Please upload an image"
     try:
-        # Move to GPU
         pipe.to("cuda")
-        # Convert to PIL
         if isinstance(input_image, np.ndarray):
             image = Image.fromarray(input_image).convert("RGB")
         else:
-            image = input_image.convert("RGB")
-        # Store original size
         original_size = image.size
-        # Quality settings
-        quality_settings = {
-            "fast": {"size": 512, "steps": 25},
-            "balanced": {"size": 768, "steps": 40},
-            "ultra": {"size": 1024, "steps": 60}
-        }
-        settings = quality_settings[quality_mode]
-        target_size = settings["size"]
-        # Calculate new size maintaining aspect ratio
-        if max(image.size) > target_size:
-            scale = target_size / max(image.size)
-            new_width = int(image.width * scale)
-            new_height = int(image.height * scale)
-        else:
-            new_width = image.width
-            new_height = image.height
-        # Make divisible by 8
-        new_width, new_height = make_divisible_by_8(new_width, new_height)
-        # Ensure minimum size
-        new_width = max(new_width, 64)
-        new_height = max(new_height, 64)
-        # Resize all images to the same size
-        working_size = (new_width, new_height)
-        image_resized = image.resize(working_size, Image.Resampling.LANCZOS)
-        # Create mask at the same size
-        mask = create_body_mask(working_size)
-        print(f"Processing at size: {working_size}")
-        # Generate
-        prompt = CLOTHES[clothing_type] + ", professional photography, preserve facial features"
-        negative_prompt = "blurry, low quality, distorted face, bad anatomy"
-        with torch.autocast("cuda"):
-            result = pipe(
-                prompt=prompt,
-                negative_prompt=negative_prompt,
-                image=image_resized,
-                mask_image=mask,
-                num_inference_steps=settings["steps"],
-                guidance_scale=7.5,
-                strength=0.85
-            ).images[0]
-        # Ensure result is the same size (it should be, but just in case)
-        if result.size != working_size:
-            result = result.resize(working_size, Image.Resampling.LANCZOS)
-        # Blend with original to preserve face
-        # Create smooth blend mask
-        blend_mask = mask.filter(ImageFilter.GaussianBlur(radius=40))
-        # All images must be the same size for composite
-        assert image_resized.size == result.size == blend_mask.size, f"Size mismatch: {image_resized.size}, {result.size}, {blend_mask.size}"
-        # Blend
-        final = Image.composite(result, image_resized, blend_mask)
-        # Resize back to original size
         if final.size != original_size:
             final = final.resize(original_size, Image.Resampling.LANCZOS)
-        # Cleanup
         pipe.to("cpu")
         torch.cuda.empty_cache()
-        return final, f"✅ Successfully added {clothing_type}!"
     except Exception as e:
-        print(f"Error details: {str(e)}")
-        return None, f"Error: {str(e)}"
-# UI
-with gr.Blocks(title="Traditional Clothing AI", theme=gr.themes.Soft()) as app:
-    gr.Markdown("""
-    # 👘 Traditional Clothing AI - Face Preserved
-    Add traditional clothing while keeping your face perfectly intact.
-    """)
-    with gr.Row():
-        with gr.Column():
-            input_image = gr.Image(
-                type="pil",
-                label="Upload Your Photo"
-            )
-            clothing_type = gr.Dropdown(
-                choices=list(CLOTHES.keys()),
-                value="Indian Sari",
-                label="Select Traditional Clothing"
-            )
-            quality_mode = gr.Radio(
-                choices=["fast", "balanced", "ultra"],
-                value="balanced",
-                label="Quality Mode",
-                info="Higher quality = longer processing time"
-            )
-            generate_btn = gr.Button(
-                "🎨 Add Traditional Clothing",
-                variant="primary",
-                size="lg"
-            )
-        with gr.Column():
-            output_image = gr.Image(
-                label="Result"
-            )
-            status_text = gr.Textbox(
-                label="Status",
-                placeholder="Upload an image and click generate..."
-            )
-    gr.Markdown("""
-    ### How it works:
-    - 🎯 Only modifies clothing area (below face)
-    - 😊 Your face remains untouched
-    - 🎨 Smooth blending for natural results
-    - ⚡ Fast mode: ~30 seconds
-    - 🔬 Ultra mode: ~2 minutes (best quality)
-    """)
-    generate_btn.click(
-        fn=generate_clothing,
-        inputs=[input_image, clothing_type, quality_mode],
-        outputs=[output_image, status_text]
-    )
-if __name__ == "__main__":
-    app.launch()

 import gradio as gr
 import torch
+from diffusers import StableDiffusionInpaintPipeline, StableDiffusionImg2ImgPipeline
 from PIL import Image, ImageDraw, ImageFilter
 import numpy as np
 import spaces
+# BETTER MODEL OPTIONS (all free on HuggingFace):
+# Option 1: Use Realistic Vision (much better for people)
+model_id = "SG161222/Realistic_Vision_V5.1_noVAE"
+# Option 2: Use DreamShaper (excellent for clothing)
+# model_id = "Lykon/DreamShaper"
+# Option 3: Use Deliberate (great quality)
+# model_id = "XpucT/Deliberate"
+# Option 4: Use specialized fashion model
+# model_id = "digiplay/majicMIX_realistic_v7"
 pipe = StableDiffusionInpaintPipeline.from_pretrained(
+    model_id,
     torch_dtype=torch.float16,
     safety_checker=None,
     requires_safety_checker=False
 )
 pipe.enable_attention_slicing()
+# BETTER PROMPTING for clothing
+CLOTHING_PROMPTS = {
+    "Indian Sari": (
+        "beautiful indian woman wearing traditional red silk sari with gold embroidery, "
+        "natural pose, professional fashion photography, detailed fabric texture, "
+        "studio lighting, high quality, sharp focus, elegant draping"
+    ),
+    "Japanese Kimono": (
+        "person wearing authentic japanese kimono, proper obi belt, traditional patterns, "
+        "natural standing pose, professional portrait, detailed silk texture, "
+        "proper kimono layering, accurate proportions"
+    ),
+    # Add more detailed prompts...
 }
+# NEGATIVE PROMPTS are crucial for quality
+NEGATIVE_PROMPT = (
+    "bad anatomy, bad hands, three hands, three legs, bad arms, missing arms, "
+    "missing fingers, extra fingers, ugly fingers, long fingers, horn, extra eyes, "
+    "huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, "
+    "animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, "
+    "octane render, mutated body parts, malformed limbs, fused fingers, too many fingers"
+)
+def create_precise_mask(image, body_part="torso"):
+    """Better mask creation for specific body parts"""
+    width, height = image.size
     mask = Image.new('L', (width, height), 0)
     draw = ImageDraw.Draw(mask)
+    if body_part == "torso":
+        # Only torso - preserves arms, legs, head
+        left = width * 0.25
+        right = width * 0.75
+        top = height * 0.35
+        bottom = height * 0.7
+        draw.ellipse([left, top, right, bottom], fill=255)
+    elif body_part == "full_body":
+        # Exclude only head and hands
+        left = width * 0.15
+        right = width * 0.85
+        top = height * 0.3
+        bottom = height * 0.95
+        # Main body
+        draw.ellipse([left, top, right, bottom], fill=255)
+        # Exclude hand areas
+        hand_size = width * 0.1
+        draw.ellipse([left-hand_size, height*0.5, left, height*0.7], fill=0)
+        draw.ellipse([right, height*0.5, right+hand_size, height*0.7], fill=0)
+    mask = mask.filter(ImageFilter.GaussianBlur(radius=20))
     return mask
+@spaces.GPU(duration=120)
+def generate_better_quality(input_image, clothing_type, num_generations=3):
+    """Generate multiple times and pick the best"""
     if input_image is None:
         return None, "Please upload an image"
     try:
         pipe.to("cuda")
+        # Prepare image
         if isinstance(input_image, np.ndarray):
             image = Image.fromarray(input_image).convert("RGB")
         else:
+            image = image.convert("RGB")
+        # Better resolution handling
         original_size = image.size
+        # Process at optimal size (not too big, not too small)
+        optimal_size = 768
+        if max(image.size) != optimal_size:
+            scale = optimal_size / max(image.size)
+            new_w = int(image.width * scale)
+            new_h = int(image.height * scale)
+            new_w = new_w - (new_w % 8)
+            new_h = new_h - (new_h % 8)
+            image = image.resize((new_w, new_h), Image.Resampling.LANCZOS)
+        # Create better mask
+        mask = create_precise_mask(image, "torso")
+        # Generate multiple times for better results
+        results = []
+        for i in range(num_generations):
+            with torch.autocast("cuda"):
+                result = pipe(
+                    prompt=CLOTHING_PROMPTS[clothing_type],
+                    negative_prompt=NEGATIVE_PROMPT,
+                    image=image,
+                    mask_image=mask,
+                    num_inference_steps=50,
+                    guidance_scale=7.5,
+                    strength=0.8,
+                    # Add variation with different seeds
+                    generator=torch.Generator("cuda").manual_seed(i * 1000)
+                ).images[0]
+            results.append(result)
+        # You could implement selection logic here
+        # For now, return the first result
+        final = results[0]
+        # Resize back
         if final.size != original_size:
             final = final.resize(original_size, Image.Resampling.LANCZOS)
         pipe.to("cpu")
         torch.cuda.empty_cache()
+        return final, "✅ Generated with better model!"
     except Exception as e:
+        return None, f"Error: {str(e)}"