Revrse commited on
Commit
f66da45
·
verified ·
1 Parent(s): 9b618ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +305 -105
app.py CHANGED
@@ -5,11 +5,22 @@ import requests
5
  import io
6
  import os
7
  import spaces
 
 
8
 
9
- # Initialize object detection using Hugging Face Inference API
10
- class ObjectDetector:
11
  def __init__(self):
12
- self.api_url = "https://api-inference.huggingface.co/models/facebook/detr-resnet-50"
 
 
 
 
 
 
 
 
 
13
 
14
  def detect(self, image, hf_token=None):
15
  import base64
@@ -26,63 +37,218 @@ class ObjectDetector:
26
 
27
  # Convert PIL image to base64 string
28
  img_buffer = io.BytesIO()
29
- image.save(img_buffer, format='JPEG')
30
  img_bytes = img_buffer.getvalue()
31
  img_base64 = base64.b64encode(img_bytes).decode("utf-8")
32
 
33
- # Send JSON payload with base64 image
34
- response = requests.post(self.api_url, headers=headers, json={"inputs": img_base64})
35
-
36
- if response.status_code != 200:
37
- raise Exception(f"Object detection API error: {response.status_code} - {response.text}")
38
-
39
- return response.json()
40
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- object_detector = ObjectDetector()
43
 
44
- # COCO class names for object detection
45
- COCO_CLASSES = [
46
- 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
47
- 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
48
- 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
49
- 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
50
- 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
51
- 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
52
- 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
53
- 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
54
- 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
55
- 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
56
- 'toothbrush'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  ]
58
 
59
- def detect_objects(image, target_class, confidence_threshold, hf_token=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  """
61
- Detect objects in the image and return bounding boxes for the target class
62
  """
63
  try:
64
- # Use Hugging Face Inference API for object detection
 
 
 
65
  results = object_detector.detect(image, hf_token)
66
 
67
- # Filter results for target class
68
-
69
- target_detections = []
 
 
70
  for detection in results:
71
- label = detection.get('label')
72
- if (
73
- label is not None and
74
- label.lower() == target_class.lower() and
75
- detection['score'] > confidence_threshold
76
- ):
77
- target_detections.append(detection)
78
-
79
- return target_detections
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  except Exception as e:
 
81
  raise gr.Error(f"Object detection failed: {str(e)}")
82
 
83
  def create_mask_from_detections(image, detections, mask_expansion=10):
84
  """
85
- Create a binary mask from object detections
86
  """
87
  width, height = image.size
88
  mask = Image.new('L', (width, height), 0) # Black mask
@@ -90,11 +256,17 @@ def create_mask_from_detections(image, detections, mask_expansion=10):
90
 
91
  for detection in detections:
92
  box = detection['box']
93
- # Expand the bounding box slightly for better masking
94
- x1 = max(0, box['xmin'] - mask_expansion)
95
- y1 = max(0, box['ymin'] - mask_expansion)
96
- x2 = min(width, box['xmax'] + mask_expansion)
97
- y2 = min(height, box['ymax'] + mask_expansion)
 
 
 
 
 
 
98
 
99
  # Draw white rectangle on mask (255 = area to inpaint)
100
  draw.rectangle([x1, y1, x2, y2], fill=255)
@@ -102,29 +274,38 @@ def create_mask_from_detections(image, detections, mask_expansion=10):
102
  return mask
103
 
104
  @spaces.GPU
105
- def remove_objects(image, object_class, confidence_threshold, mask_expansion, inpaint_prompt, hf_token):
106
  """
107
- Main function to remove objects from image using SDXL inpainting
108
  """
109
  try:
110
  if image is None:
111
  raise gr.Error("Please upload an image")
112
 
 
 
 
113
  # Try to get token from multiple sources
114
  token = hf_token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
115
  if not token:
116
  raise gr.Error("Please provide your Hugging Face token or set HF_TOKEN in Space secrets")
117
 
118
- # Step 1: Detect objects
119
- detections = detect_objects(image, object_class, confidence_threshold, token)
120
 
121
  if not detections:
122
- return image, None, f"No {object_class} objects detected with confidence > {confidence_threshold}"
 
 
 
 
 
 
123
 
124
- # Step 2: Create mask
125
  mask = create_mask_from_detections(image, detections, mask_expansion)
126
 
127
- # Step 3: Use SDXL for inpainting via Hugging Face Inference API
128
  inpaint_api_url = "https://api-inference.huggingface.co/models/diffusers/stable-diffusion-xl-1.0-inpainting-0.1"
129
 
130
  headers = {"Authorization": f"Bearer {token}"}
@@ -144,29 +325,37 @@ def remove_objects(image, object_class, confidence_threshold, mask_expansion, in
144
  'mask': ('mask.png', mask_bytes, 'image/png')
145
  }
146
 
 
 
 
147
  data = {
148
- 'prompt': inpaint_prompt,
149
- 'negative_prompt': 'blurry, low quality, distorted, artifacts',
150
- 'num_inference_steps': 20,
151
  'guidance_scale': 7.5,
152
  'strength': 0.99
153
  }
154
 
155
  try:
156
- response = requests.post(inpaint_api_url, headers=headers, files=files, data=data, timeout=60)
157
 
158
  if response.status_code == 200:
159
  result_image = Image.open(io.BytesIO(response.content))
160
- status_msg = f"✅ Successfully removed {len(detections)} {object_class} object(s)"
 
 
 
161
  else:
162
  # Fallback: return original with mask overlay for debugging
163
  result_image = create_mask_overlay(image, mask)
164
- status_msg = f"⚠️ SDXL inpainting failed (HTTP {response.status_code}). Showing detected areas in red."
 
165
 
166
  except Exception as e:
167
  # Fallback: return original with mask overlay for debugging
168
  result_image = create_mask_overlay(image, mask)
169
- status_msg = f"⚠️ SDXL inpainting failed: {str(e)}. Showing detected areas in red."
 
170
 
171
  return result_image, mask, status_msg
172
 
@@ -187,19 +376,20 @@ def create_mask_overlay(image, mask):
187
  # Create Gradio interface
188
  with gr.Blocks(
189
  fill_height=True,
190
- title="Object Removal with SDXL",
191
  theme=gr.themes.Soft()
192
  ) as demo:
193
 
194
  gr.Markdown("""
195
- # 🎯 Object Removal using AI Detection + SDXL Inpainting
196
 
197
- Upload an image, specify an object class to remove, and let AI intelligently remove it!
198
 
199
  **How it works:**
200
- 1. 🔍 **Object Detection**: Uses Facebook's DETR model to find objects
201
- 2. 🎭 **Mask Generation**: Creates precise removal masks
202
- 3. 🎨 **AI Inpainting**: Uses SDXL to intelligently fill the removed areas
 
203
  """)
204
 
205
  with gr.Row():
@@ -213,36 +403,46 @@ with gr.Blocks(
213
  height=300
214
  )
215
 
216
- object_class = gr.Dropdown(
217
- choices=COCO_CLASSES,
218
  label="🎯 Object to Remove",
 
219
  value="person",
220
- allow_custom_value=True,
221
- info="Select or type the object class to remove"
222
  )
223
 
 
 
 
 
 
 
 
 
 
 
 
224
  with gr.Accordion("⚙️ Advanced Settings", open=False):
225
  confidence_threshold = gr.Slider(
226
  minimum=0.1,
227
  maximum=1.0,
228
- value=0.5,
229
- step=0.1,
230
  label="🎚️ Detection Confidence",
231
- info="Higher = fewer but more confident detections"
232
  )
233
 
234
  mask_expansion = gr.Slider(
235
  minimum=0,
236
  maximum=50,
237
- value=15,
238
  step=5,
239
  label="📏 Mask Expansion (pixels)",
240
- info="Expand mask around detected objects"
241
  )
242
 
243
  inpaint_prompt = gr.Textbox(
244
  label="✨ Inpainting Prompt",
245
- value="natural background, seamless, high quality, photorealistic",
246
  placeholder="Describe what should replace the removed object",
247
  info="Be specific about the desired background/replacement"
248
  )
@@ -273,9 +473,9 @@ with gr.Blocks(
273
  )
274
 
275
  status_text = gr.Textbox(
276
- label="📊 Status",
277
  interactive=False,
278
- max_lines=3
279
  )
280
 
281
  # Event handlers
@@ -283,7 +483,7 @@ with gr.Blocks(
283
  fn=remove_objects,
284
  inputs=[
285
  input_image,
286
- object_class,
287
  confidence_threshold,
288
  mask_expansion,
289
  inpaint_prompt,
@@ -299,40 +499,40 @@ with gr.Blocks(
299
  ## 📚 Instructions
300
 
301
  1. **Upload an image** containing objects you want to remove
302
- 2. **Select the object class** from the dropdown (e.g., 'person', 'car', 'bottle')
303
  3. **Adjust settings** if needed:
304
- - **Confidence**: Lower = more detections, higher = fewer but more accurate
305
- - **Mask expansion**: Larger values remove more area around objects
306
- - **Inpainting prompt**: Describe the desired replacement (e.g., "grass field", "brick wall")
307
- 4. **Click "Remove Objects"** and wait for processing
308
 
309
- ### 💡 Tips for Better Results:
310
- - Use clear, high-resolution images
311
- - Be specific in inpainting prompts: "blue sky with clouds" vs "background"
312
- - For complex scenes, try different confidence thresholds
313
- - Objects partially cut off at image edges may not be detected well
314
  """)
315
 
316
  with gr.Column():
317
  gr.Markdown("""
318
- ## 🎯 Supported Objects
319
-
320
- **People & Animals**: person, cat, dog, horse, bird, cow, sheep, etc.
321
-
322
- **Vehicles**: car, bicycle, motorcycle, bus, truck, boat, airplane
323
-
324
- **Furniture**: chair, couch, bed, dining table, tv, laptop
325
 
326
- **Objects**: bottle, cup, book, phone, backpack, umbrella
327
 
328
- **And 60+ more COCO classes!**
 
 
 
 
 
 
329
 
330
- ### ⚠️ Important Notes:
331
- - **Token Required**: Either enter your HF token above OR set `HF_TOKEN` in Space secrets
332
- - **Get Token**: Visit https://huggingface.co/settings/tokens to create one
333
- - Processing may take 30-60 seconds depending on image size
334
- - Results depend on object detection accuracy and image complexity
335
- - Red overlay indicates detected areas when inpainting fails
336
  """)
337
 
338
  if __name__ == "__main__":
 
5
  import io
6
  import os
7
  import spaces
8
+ import json
9
+ import re
10
 
11
+ # Initialize object detection using the most advanced YOLO model
12
+ class AdvancedYOLODetector:
13
  def __init__(self):
14
+ # Using the most advanced YOLO model available on Hugging Face
15
+ # YOLOv8 is the latest and most advanced version
16
+ self.api_url = "https://api-inference.huggingface.co/models/ultralytics/yolov8x"
17
+ # Fallback models in order of preference:
18
+ self.fallback_models = [
19
+ "https://api-inference.huggingface.co/models/ultralytics/yolov8l",
20
+ "https://api-inference.huggingface.co/models/ultralytics/yolov8m",
21
+ "https://api-inference.huggingface.co/models/ultralytics/yolov8s",
22
+ "https://api-inference.huggingface.co/models/ultralytics/yolov8n"
23
+ ]
24
 
25
  def detect(self, image, hf_token=None):
26
  import base64
 
37
 
38
  # Convert PIL image to base64 string
39
  img_buffer = io.BytesIO()
40
+ image.save(img_buffer, format='JPEG', quality=95)
41
  img_bytes = img_buffer.getvalue()
42
  img_base64 = base64.b64encode(img_bytes).decode("utf-8")
43
 
44
+ payload = {"inputs": img_base64}
45
+
46
+ # Try main model first, then fallbacks
47
+ models_to_try = [self.api_url] + self.fallback_models
48
+
49
+ for model_url in models_to_try:
50
+ try:
51
+ response = requests.post(
52
+ model_url,
53
+ headers=headers,
54
+ json=payload,
55
+ timeout=45
56
+ )
57
+
58
+ if response.status_code == 503:
59
+ # Model is loading, wait and retry once
60
+ import time
61
+ time.sleep(15)
62
+ response = requests.post(
63
+ model_url,
64
+ headers=headers,
65
+ json=payload,
66
+ timeout=45
67
+ )
68
+
69
+ if response.status_code == 200:
70
+ result = response.json()
71
+ if isinstance(result, list) and len(result) > 0:
72
+ return result
73
+ elif isinstance(result, dict) and 'error' not in result:
74
+ return []
75
+
76
+ # If this model failed, try next one
77
+ print(f"Model {model_url} failed with status {response.status_code}, trying next...")
78
+ continue
79
+
80
+ except requests.exceptions.Timeout:
81
+ print(f"Timeout with model {model_url}, trying next...")
82
+ continue
83
+ except requests.exceptions.RequestException as e:
84
+ print(f"Network error with model {model_url}: {str(e)}, trying next...")
85
+ continue
86
+
87
+ # If all models failed
88
+ raise Exception("All YOLO models failed or are unavailable. Please try again later.")
89
 
90
+ object_detector = AdvancedYOLODetector()
91
 
92
+ # Extended object class names including common variations and synonyms
93
+ COMMON_OBJECTS = [
94
+ # People and body parts
95
+ 'person', 'people', 'human', 'man', 'woman', 'child', 'baby', 'face', 'head',
96
+
97
+ # Animals
98
+ 'cat', 'dog', 'bird', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
99
+ 'lion', 'tiger', 'monkey', 'rabbit', 'mouse', 'rat', 'pig', 'goat', 'deer', 'fox',
100
+
101
+ # Vehicles
102
+ 'car', 'truck', 'bus', 'motorcycle', 'bicycle', 'bike', 'airplane', 'plane', 'boat',
103
+ 'ship', 'train', 'van', 'taxi', 'ambulance', 'fire truck', 'police car',
104
+
105
+ # Furniture and household items
106
+ 'chair', 'table', 'couch', 'sofa', 'bed', 'desk', 'shelf', 'cabinet', 'drawer',
107
+ 'tv', 'television', 'laptop', 'computer', 'monitor', 'phone', 'mobile', 'tablet',
108
+
109
+ # Food and drinks
110
+ 'bottle', 'cup', 'glass', 'bowl', 'plate', 'fork', 'knife', 'spoon', 'banana', 'apple',
111
+ 'orange', 'pizza', 'sandwich', 'cake', 'donut', 'hot dog', 'hamburger', 'coffee',
112
+
113
+ # Sports and recreation
114
+ 'ball', 'football', 'basketball', 'tennis ball', 'baseball', 'soccer ball',
115
+ 'skateboard', 'surfboard', 'skis', 'bicycle', 'kite', 'frisbee',
116
+
117
+ # Clothing and accessories
118
+ 'hat', 'cap', 'glasses', 'sunglasses', 'bag', 'backpack', 'handbag', 'purse',
119
+ 'umbrella', 'tie', 'shoe', 'boot', 'shirt', 'jacket', 'coat',
120
+
121
+ # Tools and objects
122
+ 'scissors', 'hammer', 'screwdriver', 'knife', 'pen', 'pencil', 'book', 'paper',
123
+ 'clock', 'watch', 'key', 'remote', 'controller', 'camera', 'microphone',
124
+
125
+ # Nature and outdoor
126
+ 'tree', 'flower', 'plant', 'grass', 'rock', 'stone', 'mountain', 'cloud', 'sun',
127
+ 'bench', 'sign', 'pole', 'fence', 'gate', 'building', 'house', 'window', 'door'
128
  ]
129
 
130
+ def fuzzy_match_object(user_input, detected_labels):
131
+ """
132
+ Advanced matching function that handles synonyms, plurals, and fuzzy matching
133
+ """
134
+ user_input = user_input.lower().strip()
135
+ matches = []
136
+
137
+ # Direct matching
138
+ for detection in detected_labels:
139
+ label = detection.get('label', '').lower()
140
+
141
+ # Exact match
142
+ if label == user_input:
143
+ matches.append(detection)
144
+ continue
145
+
146
+ # Handle plurals
147
+ if user_input.endswith('s') and label == user_input[:-1]:
148
+ matches.append(detection)
149
+ continue
150
+ if label.endswith('s') and user_input == label[:-1]:
151
+ matches.append(detection)
152
+ continue
153
+
154
+ # Substring matching
155
+ if user_input in label or label in user_input:
156
+ matches.append(detection)
157
+ continue
158
+
159
+ # Handle common synonyms
160
+ synonyms = {
161
+ 'person': ['human', 'people', 'man', 'woman', 'individual'],
162
+ 'car': ['vehicle', 'automobile', 'auto'],
163
+ 'bike': ['bicycle', 'cycle'],
164
+ 'phone': ['mobile', 'cellphone', 'smartphone'],
165
+ 'tv': ['television', 'telly'],
166
+ 'couch': ['sofa', 'settee'],
167
+ 'bag': ['purse', 'handbag', 'backpack'],
168
+ 'glasses': ['spectacles', 'eyeglasses'],
169
+ 'plane': ['airplane', 'aircraft'],
170
+ 'boat': ['ship', 'vessel'],
171
+ 'dog': ['puppy', 'canine'],
172
+ 'cat': ['kitten', 'feline']
173
+ }
174
+
175
+ # Check if user input matches any synonym
176
+ for main_word, synonym_list in synonyms.items():
177
+ if (user_input == main_word and label in synonym_list) or \
178
+ (user_input in synonym_list and label == main_word):
179
+ matches.append(detection)
180
+ break
181
+
182
+ return matches
183
+
184
+ def detect_objects(image, target_object, confidence_threshold, hf_token=None):
185
  """
186
+ Detect any object in the image using advanced YOLO and return bounding boxes
187
  """
188
  try:
189
+ if not target_object or not target_object.strip():
190
+ raise gr.Error("Please enter an object name to detect and remove")
191
+
192
+ # Use advanced YOLO for object detection
193
  results = object_detector.detect(image, hf_token)
194
 
195
+ if not results or not isinstance(results, list):
196
+ return []
197
+
198
+ # Apply confidence threshold first
199
+ filtered_detections = []
200
  for detection in results:
201
+ if isinstance(detection, dict) and detection.get('score', 0) >= confidence_threshold:
202
+ filtered_detections.append(detection)
203
+
204
+ # Use fuzzy matching to find target objects
205
+ target_detections = fuzzy_match_object(target_object, filtered_detections)
206
+
207
+ # Process and validate bounding boxes
208
+ valid_detections = []
209
+ image_width, image_height = image.size
210
+
211
+ for detection in target_detections:
212
+ box = detection.get('box', {})
213
+
214
+ if box and all(key in box for key in ['xmin', 'ymin', 'xmax', 'ymax']):
215
+ # Convert coordinates
216
+ xmin = box['xmin']
217
+ ymin = box['ymin']
218
+ xmax = box['xmax']
219
+ ymax = box['ymax']
220
+
221
+ # Handle normalized coordinates (0-1 range)
222
+ if xmax <= 1.0 and ymax <= 1.0:
223
+ xmin = int(xmin * image_width)
224
+ ymin = int(ymin * image_height)
225
+ xmax = int(xmax * image_width)
226
+ ymax = int(ymax * image_height)
227
+
228
+ # Ensure coordinates are within bounds and valid
229
+ xmin = max(0, min(int(xmin), image_width))
230
+ ymin = max(0, min(int(ymin), image_height))
231
+ xmax = max(xmin, min(int(xmax), image_width))
232
+ ymax = max(ymin, min(int(ymax), image_height))
233
+
234
+ # Only add if box has valid area
235
+ if xmax > xmin and ymax > ymin:
236
+ detection_copy = detection.copy()
237
+ detection_copy['box'] = {
238
+ 'xmin': xmin, 'ymin': ymin,
239
+ 'xmax': xmax, 'ymax': ymax
240
+ }
241
+ valid_detections.append(detection_copy)
242
+
243
+ return valid_detections
244
+
245
  except Exception as e:
246
+ print(f"Detection error: {str(e)}")
247
  raise gr.Error(f"Object detection failed: {str(e)}")
248
 
249
  def create_mask_from_detections(image, detections, mask_expansion=10):
250
  """
251
+ Create a binary mask from object detections with smart expansion
252
  """
253
  width, height = image.size
254
  mask = Image.new('L', (width, height), 0) # Black mask
 
256
 
257
  for detection in detections:
258
  box = detection['box']
259
+
260
+ # Calculate expansion based on object size
261
+ box_width = box['xmax'] - box['xmin']
262
+ box_height = box['ymax'] - box['ymin']
263
+ adaptive_expansion = min(mask_expansion, max(5, int(min(box_width, box_height) * 0.1)))
264
+
265
+ # Expand the bounding box
266
+ x1 = max(0, box['xmin'] - adaptive_expansion)
267
+ y1 = max(0, box['ymin'] - adaptive_expansion)
268
+ x2 = min(width, box['xmax'] + adaptive_expansion)
269
+ y2 = min(height, box['ymax'] + adaptive_expansion)
270
 
271
  # Draw white rectangle on mask (255 = area to inpaint)
272
  draw.rectangle([x1, y1, x2, y2], fill=255)
 
274
  return mask
275
 
276
  @spaces.GPU
277
+ def remove_objects(image, object_name, confidence_threshold, mask_expansion, inpaint_prompt, hf_token):
278
  """
279
+ Main function to remove any specified object from image using advanced YOLO + SDXL
280
  """
281
  try:
282
  if image is None:
283
  raise gr.Error("Please upload an image")
284
 
285
+ if not object_name or not object_name.strip():
286
+ raise gr.Error("Please enter the name of the object you want to remove")
287
+
288
  # Try to get token from multiple sources
289
  token = hf_token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
290
  if not token:
291
  raise gr.Error("Please provide your Hugging Face token or set HF_TOKEN in Space secrets")
292
 
293
+ # Step 1: Detect objects using advanced YOLO
294
+ detections = detect_objects(image, object_name, confidence_threshold, token)
295
 
296
  if not detections:
297
+ # Provide helpful suggestions
298
+ suggestion_msg = f"No '{object_name}' objects detected with confidence > {confidence_threshold}.\n\n"
299
+ suggestion_msg += "💡 Try:\n"
300
+ suggestion_msg += "• Lowering the confidence threshold\n"
301
+ suggestion_msg += "• Using different object names (e.g., 'person' instead of 'human')\n"
302
+ suggestion_msg += "• Checking if the object is clearly visible in the image"
303
+ return image, None, suggestion_msg
304
 
305
+ # Step 2: Create mask with adaptive expansion
306
  mask = create_mask_from_detections(image, detections, mask_expansion)
307
 
308
+ # Step 3: Use SDXL for inpainting
309
  inpaint_api_url = "https://api-inference.huggingface.co/models/diffusers/stable-diffusion-xl-1.0-inpainting-0.1"
310
 
311
  headers = {"Authorization": f"Bearer {token}"}
 
325
  'mask': ('mask.png', mask_bytes, 'image/png')
326
  }
327
 
328
+ # Enhanced inpainting prompt
329
+ enhanced_prompt = f"{inpaint_prompt}, photorealistic, high quality, detailed, natural lighting"
330
+
331
  data = {
332
+ 'prompt': enhanced_prompt,
333
+ 'negative_prompt': 'blurry, low quality, distorted, artifacts, unrealistic, pixelated, noise',
334
+ 'num_inference_steps': 25,
335
  'guidance_scale': 7.5,
336
  'strength': 0.99
337
  }
338
 
339
  try:
340
+ response = requests.post(inpaint_api_url, headers=headers, files=files, data=data, timeout=90)
341
 
342
  if response.status_code == 200:
343
  result_image = Image.open(io.BytesIO(response.content))
344
+ detected_labels = [d.get('label', 'unknown') for d in detections]
345
+ status_msg = f"✅ Successfully removed {len(detections)} '{object_name}' object(s)\n"
346
+ status_msg += f"🎯 Detected as: {', '.join(detected_labels)}\n"
347
+ status_msg += f"🔧 Used: Advanced YOLO + SDXL Inpainting"
348
  else:
349
  # Fallback: return original with mask overlay for debugging
350
  result_image = create_mask_overlay(image, mask)
351
+ status_msg = f"⚠️ SDXL inpainting failed (HTTP {response.status_code}). Showing detected areas in red.\n"
352
+ status_msg += f"🎯 Found {len(detections)} '{object_name}' object(s) - detection was successful"
353
 
354
  except Exception as e:
355
  # Fallback: return original with mask overlay for debugging
356
  result_image = create_mask_overlay(image, mask)
357
+ status_msg = f"⚠️ SDXL inpainting failed: {str(e)}. Showing detected areas in red.\n"
358
+ status_msg += f"🎯 Found {len(detections)} '{object_name}' object(s) - detection was successful"
359
 
360
  return result_image, mask, status_msg
361
 
 
376
  # Create Gradio interface
377
  with gr.Blocks(
378
  fill_height=True,
379
+ title="Advanced Object Removal with YOLOv8",
380
  theme=gr.themes.Soft()
381
  ) as demo:
382
 
383
  gr.Markdown("""
384
+ # 🚀 Advanced Object Removal using YOLOv8 + SDXL Inpainting
385
 
386
+ Upload an image and specify **ANY object** you want to remove - no limitations!
387
 
388
  **How it works:**
389
+ 1. 🔍 **YOLOv8 Detection**: Uses the most advanced YOLO model for object detection
390
+ 2. 🧠 **Smart Matching**: Handles synonyms, plurals, and fuzzy object name matching
391
+ 3. 🎭 **Adaptive Masking**: Creates intelligent removal masks
392
+ 4. 🎨 **SDXL Inpainting**: Uses state-of-the-art AI to fill removed areas seamlessly
393
  """)
394
 
395
  with gr.Row():
 
403
  height=300
404
  )
405
 
406
+ object_name = gr.Textbox(
 
407
  label="🎯 Object to Remove",
408
+ placeholder="Enter any object name (e.g., person, car, dog, bottle, tree, sign...)",
409
  value="person",
410
+ info="Type ANY object name - supports synonyms and variations!"
 
411
  )
412
 
413
+ # Add suggestions
414
+ with gr.Row():
415
+ gr.Examples(
416
+ examples=[
417
+ ["person"], ["car"], ["dog"], ["cat"], ["bottle"],
418
+ ["chair"], ["tree"], ["sign"], ["bag"], ["phone"]
419
+ ],
420
+ inputs=[object_name],
421
+ label="💡 Quick Examples"
422
+ )
423
+
424
  with gr.Accordion("⚙️ Advanced Settings", open=False):
425
  confidence_threshold = gr.Slider(
426
  minimum=0.1,
427
  maximum=1.0,
428
+ value=0.3,
429
+ step=0.05,
430
  label="🎚️ Detection Confidence",
431
+ info="Lower = more detections, higher = fewer but more confident"
432
  )
433
 
434
  mask_expansion = gr.Slider(
435
  minimum=0,
436
  maximum=50,
437
+ value=20,
438
  step=5,
439
  label="📏 Mask Expansion (pixels)",
440
+ info="Expand mask around detected objects for better removal"
441
  )
442
 
443
  inpaint_prompt = gr.Textbox(
444
  label="✨ Inpainting Prompt",
445
+ value="natural background, seamless, realistic environment",
446
  placeholder="Describe what should replace the removed object",
447
  info="Be specific about the desired background/replacement"
448
  )
 
473
  )
474
 
475
  status_text = gr.Textbox(
476
+ label="📊 Status & Detection Info",
477
  interactive=False,
478
+ max_lines=4
479
  )
480
 
481
  # Event handlers
 
483
  fn=remove_objects,
484
  inputs=[
485
  input_image,
486
+ object_name,
487
  confidence_threshold,
488
  mask_expansion,
489
  inpaint_prompt,
 
499
  ## 📚 Instructions
500
 
501
  1. **Upload an image** containing objects you want to remove
502
+ 2. **Enter ANY object name** in the text box - no restrictions!
503
  3. **Adjust settings** if needed:
504
+ - **Confidence**: Start with 0.3, increase if too many false detections
505
+ - **Mask expansion**: Larger values ensure complete object removal
506
+ - **Inpainting prompt**: Describe the desired replacement scene
507
+ 4. **Click "Remove Objects"** and wait for AI processing
508
 
509
+ ### 💡 Smart Object Recognition:
510
+ - **Handles variations**: "car" = "vehicle" = "automobile"
511
+ - **Plural support**: "person" matches "people"
512
+ - **Common synonyms**: "phone" = "mobile" = "smartphone"
513
+ - **Fuzzy matching**: Partial name matches work too!
514
  """)
515
 
516
  with gr.Column():
517
  gr.Markdown("""
518
+ ## 🎯 What Can Be Removed?
 
 
 
 
 
 
519
 
520
+ **✅ ANY Object You Can Think Of!**
521
 
522
+ **Popular Examples:**
523
+ - **People**: person, human, man, woman, child, face
524
+ - **Animals**: dog, cat, bird, horse, any animal name
525
+ - **Vehicles**: car, truck, bike, plane, boat, motorcycle
526
+ - **Objects**: bottle, bag, phone, chair, table, sign
527
+ - **Nature**: tree, flower, rock, cloud, mountain
528
+ - **And literally thousands more!**
529
 
530
+ ### ⚠️ System Info:
531
+ - **🚀 Powered by**: YOLOv8x (most advanced YOLO model)
532
+ - **🎨 Inpainting**: SDXL for photorealistic results
533
+ - **⏱️ Processing**: 30-90 seconds depending on complexity
534
+ - **🔧 Fallback**: Multiple YOLO models for reliability
535
+ - **Token Required**: HF token needed for API access
536
  """)
537
 
538
  if __name__ == "__main__":