beertoshi commited on
Commit
5435d62
·
verified ·
1 Parent(s): 3af7399

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -160
app.py CHANGED
@@ -1,203 +1,149 @@
1
  import gradio as gr
2
  import torch
3
- from diffusers import StableDiffusionInpaintPipeline
4
  from PIL import Image, ImageDraw, ImageFilter
5
  import numpy as np
6
  import spaces
7
 
8
- # Load model
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  pipe = StableDiffusionInpaintPipeline.from_pretrained(
10
- "stabilityai/stable-diffusion-2-inpainting",
11
  torch_dtype=torch.float16,
12
  safety_checker=None,
13
  requires_safety_checker=False
14
  )
15
  pipe.enable_attention_slicing()
16
 
17
- CLOTHES = {
18
- "Indian Sari": "woman wearing beautiful red and gold sari, traditional Indian dress, high quality photo",
19
- "Japanese Kimono": "person wearing elegant kimono, traditional Japanese clothing, professional photo",
20
- "African Dashiki": "person wearing colorful dashiki, traditional African clothing, detailed",
21
- "Chinese Qipao": "woman wearing elegant qipao dress, traditional Chinese clothing",
22
- "Scottish Kilt": "man wearing Scottish kilt, traditional highland dress",
23
- "Middle Eastern Thobe": "person wearing white thobe, traditional Middle Eastern clothing"
 
 
 
 
 
 
24
  }
25
 
26
- def make_divisible_by_8(width, height):
27
- """Ensure dimensions are divisible by 8"""
28
- return width - (width % 8), height - (height % 8)
 
 
 
 
 
29
 
30
- def create_body_mask(image_size):
31
- """Create mask for body area only"""
32
- width, height = image_size
33
  mask = Image.new('L', (width, height), 0)
34
  draw = ImageDraw.Draw(mask)
35
 
36
- # Body area (avoiding face)
37
- top = height * 0.35 # Start below face
38
- left = width * 0.1
39
- right = width * 0.9
40
- bottom = height * 0.98
 
 
41
 
42
- draw.ellipse([left, top, right, bottom], fill=255)
43
- mask = mask.filter(ImageFilter.GaussianBlur(radius=25))
 
 
 
 
 
 
 
 
 
 
 
 
44
 
 
45
  return mask
46
 
47
- @spaces.GPU(duration=90)
48
- def generate_clothing(input_image, clothing_type, quality_mode="balanced"):
 
 
49
  if input_image is None:
50
  return None, "Please upload an image"
51
 
52
  try:
53
- # Move to GPU
54
  pipe.to("cuda")
55
 
56
- # Convert to PIL
57
  if isinstance(input_image, np.ndarray):
58
  image = Image.fromarray(input_image).convert("RGB")
59
  else:
60
- image = input_image.convert("RGB")
61
 
62
- # Store original size
63
  original_size = image.size
64
 
65
- # Quality settings
66
- quality_settings = {
67
- "fast": {"size": 512, "steps": 25},
68
- "balanced": {"size": 768, "steps": 40},
69
- "ultra": {"size": 1024, "steps": 60}
70
- }
71
-
72
- settings = quality_settings[quality_mode]
73
- target_size = settings["size"]
74
-
75
- # Calculate new size maintaining aspect ratio
76
- if max(image.size) > target_size:
77
- scale = target_size / max(image.size)
78
- new_width = int(image.width * scale)
79
- new_height = int(image.height * scale)
80
- else:
81
- new_width = image.width
82
- new_height = image.height
83
-
84
- # Make divisible by 8
85
- new_width, new_height = make_divisible_by_8(new_width, new_height)
86
-
87
- # Ensure minimum size
88
- new_width = max(new_width, 64)
89
- new_height = max(new_height, 64)
90
-
91
- # Resize all images to the same size
92
- working_size = (new_width, new_height)
93
- image_resized = image.resize(working_size, Image.Resampling.LANCZOS)
94
-
95
- # Create mask at the same size
96
- mask = create_body_mask(working_size)
97
-
98
- print(f"Processing at size: {working_size}")
99
-
100
- # Generate
101
- prompt = CLOTHES[clothing_type] + ", professional photography, preserve facial features"
102
- negative_prompt = "blurry, low quality, distorted face, bad anatomy"
103
-
104
- with torch.autocast("cuda"):
105
- result = pipe(
106
- prompt=prompt,
107
- negative_prompt=negative_prompt,
108
- image=image_resized,
109
- mask_image=mask,
110
- num_inference_steps=settings["steps"],
111
- guidance_scale=7.5,
112
- strength=0.85
113
- ).images[0]
114
-
115
- # Ensure result is the same size (it should be, but just in case)
116
- if result.size != working_size:
117
- result = result.resize(working_size, Image.Resampling.LANCZOS)
118
-
119
- # Blend with original to preserve face
120
- # Create smooth blend mask
121
- blend_mask = mask.filter(ImageFilter.GaussianBlur(radius=40))
122
-
123
- # All images must be the same size for composite
124
- assert image_resized.size == result.size == blend_mask.size, f"Size mismatch: {image_resized.size}, {result.size}, {blend_mask.size}"
125
-
126
- # Blend
127
- final = Image.composite(result, image_resized, blend_mask)
128
-
129
- # Resize back to original size
130
  if final.size != original_size:
131
  final = final.resize(original_size, Image.Resampling.LANCZOS)
132
 
133
- # Cleanup
134
  pipe.to("cpu")
135
  torch.cuda.empty_cache()
136
 
137
- return final, f"✅ Successfully added {clothing_type}!"
138
 
139
  except Exception as e:
140
- print(f"Error details: {str(e)}")
141
- return None, f"Error: {str(e)}"
142
-
143
- # UI
144
- with gr.Blocks(title="Traditional Clothing AI", theme=gr.themes.Soft()) as app:
145
- gr.Markdown("""
146
- # 👘 Traditional Clothing AI - Face Preserved
147
-
148
- Add traditional clothing while keeping your face perfectly intact.
149
- """)
150
-
151
- with gr.Row():
152
- with gr.Column():
153
- input_image = gr.Image(
154
- type="pil",
155
- label="Upload Your Photo"
156
- )
157
-
158
- clothing_type = gr.Dropdown(
159
- choices=list(CLOTHES.keys()),
160
- value="Indian Sari",
161
- label="Select Traditional Clothing"
162
- )
163
-
164
- quality_mode = gr.Radio(
165
- choices=["fast", "balanced", "ultra"],
166
- value="balanced",
167
- label="Quality Mode",
168
- info="Higher quality = longer processing time"
169
- )
170
-
171
- generate_btn = gr.Button(
172
- "🎨 Add Traditional Clothing",
173
- variant="primary",
174
- size="lg"
175
- )
176
-
177
- with gr.Column():
178
- output_image = gr.Image(
179
- label="Result"
180
- )
181
-
182
- status_text = gr.Textbox(
183
- label="Status",
184
- placeholder="Upload an image and click generate..."
185
- )
186
-
187
- gr.Markdown("""
188
- ### How it works:
189
- - 🎯 Only modifies clothing area (below face)
190
- - 😊 Your face remains untouched
191
- - 🎨 Smooth blending for natural results
192
- - ⚡ Fast mode: ~30 seconds
193
- - 🔬 Ultra mode: ~2 minutes (best quality)
194
- """)
195
-
196
- generate_btn.click(
197
- fn=generate_clothing,
198
- inputs=[input_image, clothing_type, quality_mode],
199
- outputs=[output_image, status_text]
200
- )
201
-
202
- if __name__ == "__main__":
203
- app.launch()
 
1
  import gradio as gr
2
  import torch
3
+ from diffusers import StableDiffusionInpaintPipeline, StableDiffusionImg2ImgPipeline
4
  from PIL import Image, ImageDraw, ImageFilter
5
  import numpy as np
6
  import spaces
7
 
8
+ # BETTER MODEL OPTIONS (all free on HuggingFace):
9
+
10
+ # Option 1: Use Realistic Vision (much better for people)
11
+ model_id = "SG161222/Realistic_Vision_V5.1_noVAE"
12
+
13
+ # Option 2: Use DreamShaper (excellent for clothing)
14
+ # model_id = "Lykon/DreamShaper"
15
+
16
+ # Option 3: Use Deliberate (great quality)
17
+ # model_id = "XpucT/Deliberate"
18
+
19
+ # Option 4: Use specialized fashion model
20
+ # model_id = "digiplay/majicMIX_realistic_v7"
21
+
22
  pipe = StableDiffusionInpaintPipeline.from_pretrained(
23
+ model_id,
24
  torch_dtype=torch.float16,
25
  safety_checker=None,
26
  requires_safety_checker=False
27
  )
28
  pipe.enable_attention_slicing()
29
 
30
+ # BETTER PROMPTING for clothing
31
+ CLOTHING_PROMPTS = {
32
+ "Indian Sari": (
33
+ "beautiful indian woman wearing traditional red silk sari with gold embroidery, "
34
+ "natural pose, professional fashion photography, detailed fabric texture, "
35
+ "studio lighting, high quality, sharp focus, elegant draping"
36
+ ),
37
+ "Japanese Kimono": (
38
+ "person wearing authentic japanese kimono, proper obi belt, traditional patterns, "
39
+ "natural standing pose, professional portrait, detailed silk texture, "
40
+ "proper kimono layering, accurate proportions"
41
+ ),
42
+ # Add more detailed prompts...
43
  }
44
 
45
+ # NEGATIVE PROMPTS are crucial for quality
46
+ NEGATIVE_PROMPT = (
47
+ "bad anatomy, bad hands, three hands, three legs, bad arms, missing arms, "
48
+ "missing fingers, extra fingers, ugly fingers, long fingers, horn, extra eyes, "
49
+ "huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, "
50
+ "animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, "
51
+ "octane render, mutated body parts, malformed limbs, fused fingers, too many fingers"
52
+ )
53
 
54
+ def create_precise_mask(image, body_part="torso"):
55
+ """Better mask creation for specific body parts"""
56
+ width, height = image.size
57
  mask = Image.new('L', (width, height), 0)
58
  draw = ImageDraw.Draw(mask)
59
 
60
+ if body_part == "torso":
61
+ # Only torso - preserves arms, legs, head
62
+ left = width * 0.25
63
+ right = width * 0.75
64
+ top = height * 0.35
65
+ bottom = height * 0.7
66
+ draw.ellipse([left, top, right, bottom], fill=255)
67
 
68
+ elif body_part == "full_body":
69
+ # Exclude only head and hands
70
+ left = width * 0.15
71
+ right = width * 0.85
72
+ top = height * 0.3
73
+ bottom = height * 0.95
74
+
75
+ # Main body
76
+ draw.ellipse([left, top, right, bottom], fill=255)
77
+
78
+ # Exclude hand areas
79
+ hand_size = width * 0.1
80
+ draw.ellipse([left-hand_size, height*0.5, left, height*0.7], fill=0)
81
+ draw.ellipse([right, height*0.5, right+hand_size, height*0.7], fill=0)
82
 
83
+ mask = mask.filter(ImageFilter.GaussianBlur(radius=20))
84
  return mask
85
 
86
+ @spaces.GPU(duration=120)
87
+ def generate_better_quality(input_image, clothing_type, num_generations=3):
88
+ """Generate multiple times and pick the best"""
89
+
90
  if input_image is None:
91
  return None, "Please upload an image"
92
 
93
  try:
 
94
  pipe.to("cuda")
95
 
96
+ # Prepare image
97
  if isinstance(input_image, np.ndarray):
98
  image = Image.fromarray(input_image).convert("RGB")
99
  else:
100
+ image = image.convert("RGB")
101
 
102
+ # Better resolution handling
103
  original_size = image.size
104
 
105
+ # Process at optimal size (not too big, not too small)
106
+ optimal_size = 768
107
+ if max(image.size) != optimal_size:
108
+ scale = optimal_size / max(image.size)
109
+ new_w = int(image.width * scale)
110
+ new_h = int(image.height * scale)
111
+ new_w = new_w - (new_w % 8)
112
+ new_h = new_h - (new_h % 8)
113
+ image = image.resize((new_w, new_h), Image.Resampling.LANCZOS)
114
+
115
+ # Create better mask
116
+ mask = create_precise_mask(image, "torso")
117
+
118
+ # Generate multiple times for better results
119
+ results = []
120
+ for i in range(num_generations):
121
+ with torch.autocast("cuda"):
122
+ result = pipe(
123
+ prompt=CLOTHING_PROMPTS[clothing_type],
124
+ negative_prompt=NEGATIVE_PROMPT,
125
+ image=image,
126
+ mask_image=mask,
127
+ num_inference_steps=50,
128
+ guidance_scale=7.5,
129
+ strength=0.8,
130
+ # Add variation with different seeds
131
+ generator=torch.Generator("cuda").manual_seed(i * 1000)
132
+ ).images[0]
133
+ results.append(result)
134
+
135
+ # You could implement selection logic here
136
+ # For now, return the first result
137
+ final = results[0]
138
+
139
+ # Resize back
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  if final.size != original_size:
141
  final = final.resize(original_size, Image.Resampling.LANCZOS)
142
 
 
143
  pipe.to("cpu")
144
  torch.cuda.empty_cache()
145
 
146
+ return final, "✅ Generated with better model!"
147
 
148
  except Exception as e:
149
+ return None, f"Error: {str(e)}"