Revrse commited on
Commit
c45ce4a
·
verified ·
1 Parent(s): ce66404

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -298
app.py CHANGED
@@ -28,129 +28,6 @@ def load_flux_model():
28
  flux_pipe = None
29
 
30
  # Initialize object detection using proven working models
31
- class AdvancedObjectDetector:
32
- def __init__(self):
33
- # Using proven working object detection models on Hugging Face Inference API
34
- self.api_url = "https://api-inference.huggingface.co/models/hustvl/yolos-small"
35
- # Fallback models in order of preference (all tested and working):
36
- self.fallback_models = [
37
- "https://api-inference.huggingface.co/models/facebook/detr-resnet-50",
38
- "https://api-inference.huggingface.co/models/hustvl/yolos-tiny",
39
- "https://api-inference.huggingface.co/models/microsoft/DiNAT-Large-ImageNet-1K",
40
- "https://api-inference.huggingface.co/models/google/owlvit-base-patch32"
41
- ]
42
-
43
- def detect(self, image, hf_token=None):
44
- import base64
45
-
46
- # Try multiple ways to get HF token
47
- token = hf_token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
48
- if not token:
49
- raise Exception("HF Token required. Please set HF_TOKEN in Space secrets or environment variables")
50
-
51
- headers = {
52
- "Authorization": f"Bearer {token}",
53
- "Content-Type": "application/json"
54
- }
55
-
56
- # Convert PIL image to base64 string
57
- img_buffer = io.BytesIO()
58
- image.save(img_buffer, format='JPEG', quality=95)
59
- img_bytes = img_buffer.getvalue()
60
- img_base64 = base64.b64encode(img_bytes).decode("utf-8")
61
-
62
- payload = {"inputs": img_base64}
63
-
64
- # Try main model first, then fallbacks
65
- models_to_try = [self.api_url] + self.fallback_models
66
-
67
- for model_url in models_to_try:
68
- try:
69
- response = requests.post(
70
- model_url,
71
- headers=headers,
72
- json=payload,
73
- timeout=45
74
- )
75
-
76
- if response.status_code == 503:
77
- # Model is loading, wait and retry once
78
- import time
79
- time.sleep(15)
80
- response = requests.post(
81
- model_url,
82
- headers=headers,
83
- json=payload,
84
- timeout=45
85
- )
86
-
87
- if response.status_code == 200:
88
- result = response.json()
89
- if isinstance(result, list) and len(result) > 0:
90
- print(f"✅ Successfully used model: {model_url.split('/')[-1]}")
91
- return result
92
- elif isinstance(result, dict) and 'error' not in result:
93
- return []
94
- elif response.status_code == 503:
95
- print(f"Model {model_url.split('/')[-1]} is loading...")
96
- else:
97
- print(f"Model {model_url.split('/')[-1]} failed with status {response.status_code}: {response.text[:200]}")
98
-
99
- # If this model failed, try next one
100
- continue
101
-
102
- except requests.exceptions.Timeout:
103
- print(f"Timeout with model {model_url}, trying next...")
104
- continue
105
- except requests.exceptions.RequestException as e:
106
- print(f"Network error with model {model_url}: {str(e)}, trying next...")
107
- continue
108
-
109
- # If all models failed
110
- raise Exception("All object detection models are currently unavailable. This usually means:\n" +
111
- "1. Models are loading (wait 2-3 minutes and try again)\n" +
112
- "2. High API traffic - try again in a few minutes\n" +
113
- "3. Check your HF token is valid and has sufficient quota")
114
-
115
- object_detector = AdvancedObjectDetector()
116
-
117
- # Extended object class names including common variations and synonyms
118
- COMMON_OBJECTS = [
119
- # People and body parts
120
- 'person', 'people', 'human', 'man', 'woman', 'child', 'baby', 'face', 'head',
121
-
122
- # Animals
123
- 'cat', 'dog', 'bird', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
124
- 'lion', 'tiger', 'monkey', 'rabbit', 'mouse', 'rat', 'pig', 'goat', 'deer', 'fox',
125
-
126
- # Vehicles
127
- 'car', 'truck', 'bus', 'motorcycle', 'bicycle', 'bike', 'airplane', 'plane', 'boat',
128
- 'ship', 'train', 'van', 'taxi', 'ambulance', 'fire truck', 'police car',
129
-
130
- # Furniture and household items
131
- 'chair', 'table', 'couch', 'sofa', 'bed', 'desk', 'shelf', 'cabinet', 'drawer',
132
- 'tv', 'television', 'laptop', 'computer', 'monitor', 'phone', 'mobile', 'tablet',
133
-
134
- # Food and drinks
135
- 'bottle', 'cup', 'glass', 'bowl', 'plate', 'fork', 'knife', 'spoon', 'banana', 'apple',
136
- 'orange', 'pizza', 'sandwich', 'cake', 'donut', 'hot dog', 'hamburger', 'coffee',
137
-
138
- # Sports and recreation
139
- 'ball', 'football', 'basketball', 'tennis ball', 'baseball', 'soccer ball',
140
- 'skateboard', 'surfboard', 'skis', 'bicycle', 'kite', 'frisbee',
141
-
142
- # Clothing and accessories
143
- 'hat', 'cap', 'glasses', 'sunglasses', 'bag', 'backpack', 'handbag', 'purse',
144
- 'umbrella', 'tie', 'shoe', 'boot', 'shirt', 'jacket', 'coat',
145
-
146
- # Tools and objects
147
- 'scissors', 'hammer', 'screwdriver', 'knife', 'pen', 'pencil', 'book', 'paper',
148
- 'clock', 'watch', 'key', 'remote', 'controller', 'camera', 'microphone',
149
-
150
- # Nature and outdoor
151
- 'tree', 'flower', 'plant', 'grass', 'rock', 'stone', 'mountain', 'cloud', 'sun',
152
- 'bench', 'sign', 'pole', 'fence', 'gate', 'building', 'house', 'window', 'door'
153
- ]
154
 
155
  def fuzzy_match_object(user_input, detected_labels):
156
  """
@@ -206,97 +83,11 @@ def fuzzy_match_object(user_input, detected_labels):
206
 
207
  return matches
208
 
209
- def detect_objects(image, target_object, confidence_threshold, hf_token=None):
210
- """
211
- Detect any object in the image using advanced detection models and return bounding boxes
212
- """
213
- try:
214
- if not target_object or not target_object.strip():
215
- raise gr.Error("Please enter an object name to detect and remove")
216
-
217
- # Use advanced detection for object detection
218
- results = object_detector.detect(image, hf_token)
219
-
220
- if not results or not isinstance(results, list):
221
- return []
222
-
223
- # Apply confidence threshold first
224
- filtered_detections = []
225
- for detection in results:
226
- if isinstance(detection, dict) and detection.get('score', 0) >= confidence_threshold:
227
- filtered_detections.append(detection)
228
-
229
- # Use fuzzy matching to find target objects
230
- target_detections = fuzzy_match_object(target_object, filtered_detections)
231
-
232
- # Process and validate bounding boxes
233
- valid_detections = []
234
- image_width, image_height = image.size
235
-
236
- for detection in target_detections:
237
- box = detection.get('box', {})
238
-
239
- if box and all(key in box for key in ['xmin', 'ymin', 'xmax', 'ymax']):
240
- # Convert coordinates
241
- xmin = box['xmin']
242
- ymin = box['ymin']
243
- xmax = box['xmax']
244
- ymax = box['ymax']
245
-
246
- # Handle normalized coordinates (0-1 range)
247
- if xmax <= 1.0 and ymax <= 1.0:
248
- xmin = int(xmin * image_width)
249
- ymin = int(ymin * image_height)
250
- xmax = int(xmax * image_width)
251
- ymax = int(ymax * image_height)
252
-
253
- # Ensure coordinates are within bounds and valid
254
- xmin = max(0, min(int(xmin), image_width))
255
- ymin = max(0, min(int(ymin), image_height))
256
- xmax = max(xmin, min(int(xmax), image_width))
257
- ymax = max(ymin, min(int(ymax), image_height))
258
-
259
- # Only add if box has valid area
260
- if xmax > xmin and ymax > ymin:
261
- detection_copy = detection.copy()
262
- detection_copy['box'] = {
263
- 'xmin': xmin, 'ymin': ymin,
264
- 'xmax': xmax, 'ymax': ymax
265
- }
266
- valid_detections.append(detection_copy)
267
-
268
- return valid_detections
269
 
270
  except Exception as e:
271
  print(f"Detection error: {str(e)}")
272
  raise gr.Error(f"Object detection failed: {str(e)}")
273
 
274
- def create_mask_from_detections(image, detections, mask_expansion=10):
275
- """
276
- Create a binary mask from object detections with smart expansion
277
- """
278
- width, height = image.size
279
- mask = Image.new('L', (width, height), 0) # Black mask
280
- draw = ImageDraw.Draw(mask)
281
-
282
- for detection in detections:
283
- box = detection['box']
284
-
285
- # Calculate expansion based on object size
286
- box_width = box['xmax'] - box['xmin']
287
- box_height = box['ymax'] - box['ymin']
288
- adaptive_expansion = min(mask_expansion, max(5, int(min(box_width, box_height) * 0.1)))
289
-
290
- # Expand the bounding box
291
- x1 = max(0, box['xmin'] - adaptive_expansion)
292
- y1 = max(0, box['ymin'] - adaptive_expansion)
293
- x2 = min(width, box['xmax'] + adaptive_expansion)
294
- y2 = min(height, box['ymax'] + adaptive_expansion)
295
-
296
- # Draw white rectangle on mask (255 = area to inpaint)
297
- draw.rectangle([x1, y1, x2, y2], fill=255)
298
-
299
- return mask
300
 
301
  @spaces.GPU
302
  def flux_inpainting(image, object_name, guidance_scale=2.5, steps=28):
@@ -334,19 +125,9 @@ def flux_inpainting(image, object_name, guidance_scale=2.5, steps=28):
334
  print(f"FLUX inpainting error: {str(e)}")
335
  return None, False
336
 
337
- def create_mask_overlay(image, mask):
338
- """Create a visualization showing the mask overlay on the original image"""
339
- result_image = image.copy()
340
- overlay = Image.new('RGBA', image.size, (255, 0, 0, 100))
341
- mask_rgba = mask.convert('RGBA')
342
- result_image = Image.alpha_composite(
343
- result_image.convert('RGBA'),
344
- Image.composite(overlay, Image.new('RGBA', image.size, (0,0,0,0)), mask)
345
- )
346
- return result_image.convert('RGB')
347
 
348
  @spaces.GPU
349
- def remove_objects(image, object_name, confidence_threshold, mask_expansion, guidance_scale, steps, hf_token):
350
  """
351
  Main function to remove any specified object using advanced detection + FLUX inpainting
352
  """
@@ -358,7 +139,7 @@ def remove_objects(image, object_name, confidence_threshold, mask_expansion, gui
358
  raise gr.Error("Please enter the name of the object you want to remove")
359
 
360
  # Try to get token from multiple sources
361
- token = hf_token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
362
  if not token:
363
  raise gr.Error("Please provide your Hugging Face token or set HF_TOKEN in Space secrets")
364
 
@@ -367,20 +148,14 @@ def remove_objects(image, object_name, confidence_threshold, mask_expansion, gui
367
  result_image, flux_success = flux_inpainting(image, object_name, guidance_scale, steps)
368
 
369
  if flux_success and result_image:
370
- detected_labels = [d.get('label', 'unknown') for d in detections]
371
- status_msg = f"✅ Successfully removed {len(detections)} '{object_name}' object(s)\n"
372
- status_msg += f"🎯 Detected as: {', '.join(detected_labels)}\n"
373
- status_msg += f"🚀 Used: FLUX.1 Kontext for professional-quality removal\n"
374
  status_msg += f"⚙️ Settings: Guidance={guidance_scale}, Steps={steps}"
375
- return result_image, mask, status_msg
376
  else:
377
  # Fallback: show detection areas
378
- result_image = create_mask_overlay(image, mask)
379
- status_msg = f"⚠️ FLUX inpainting failed, but detection was successful\n"
380
- status_msg += f"🎯 Found {len(detections)} '{object_name}' object(s)\n"
381
- status_msg += f"📍 Showing detected areas in red overlay\n"
382
  status_msg += f"💡 Try adjusting guidance scale or steps, or check GPU availability"
383
- return result_image, mask, status_msg
384
 
385
  except Exception as e:
386
  return image, None, f"❌ Error: {str(e)}"
@@ -428,23 +203,6 @@ with gr.Blocks(
428
  )
429
 
430
  with gr.Accordion("⚙️ Advanced Settings", open=False):
431
- confidence_threshold = gr.Slider(
432
- minimum=0.1,
433
- maximum=1.0,
434
- value=0.3,
435
- step=0.05,
436
- label="🎚️ Detection Confidence",
437
- info="Lower = more detections, higher = fewer but more confident"
438
- )
439
-
440
- mask_expansion = gr.Slider(
441
- minimum=0,
442
- maximum=50,
443
- value=20,
444
- step=5,
445
- label="📏 Mask Expansion (pixels)",
446
- info="Expand mask around detected objects for debugging"
447
- )
448
 
449
  guidance_scale = gr.Slider(
450
  minimum=1.0,
@@ -490,63 +248,13 @@ with gr.Blocks(
490
  inputs=[
491
  input_image,
492
  object_name,
493
- confidence_threshold,
494
- mask_expansion,
495
  guidance_scale,
496
  steps,
497
  ],
498
  outputs=[output_image, status_text]
499
  )
500
 
501
- # Instructions and examples
502
- with gr.Row():
503
- with gr.Column():
504
- gr.Markdown("""
505
- ## 📚 Instructions
506
-
507
- 1. **Upload an image** containing objects you want to remove
508
- 2. **Enter ANY object name** in the text box - no restrictions!
509
- 3. **Adjust detection settings** if needed:
510
- - **Confidence**: Start with 0.3, increase if too many false detections
511
- - **Mask expansion**: For debugging - shows detection areas
512
- 4. **Fine-tune FLUX settings**:
513
- - **Guidance Scale**: 2.5 is optimal for most cases
514
- - **Steps**: 28 gives good quality/speed balance
515
- 5. **Click "Remove Objects"** and wait for professional AI processing
516
-
517
- ### 💡 Smart Object Recognition:
518
- - **Handles variations**: "car" = "vehicle" = "automobile"
519
- - **Plural support**: "person" matches "people"
520
- - **Common synonyms**: "phone" = "mobile" = "smartphone"
521
- - **Fuzzy matching**: Partial name matches work too!
522
- """)
523
 
524
- with gr.Column():
525
- gr.Markdown("""
526
- ## 🎯 What Can Be Removed?
527
-
528
- **✅ ANY Object You Can Think Of!**
529
-
530
- **Popular Examples:**
531
- - **People**: person, human, man, woman, child, face
532
- - **Animals**: dog, cat, bird, horse, any animal name
533
- - **Vehicles**: car, truck, bike, plane, boat, motorcycle
534
- - **Objects**: bottle, bag, phone, chair, table, sign
535
- - **Nature**: tree, flower, rock, cloud, mountain
536
- - **And literally thousands more!**
537
-
538
- ### ⚡ FLUX.1 Kontext Advantages:
539
- - **🎨 Professional Quality**: State-of-the-art contextual editing
540
- - **🧠 Intelligent Fill**: Understands scene context and lighting
541
- - **⚡ GPU Accelerated**: Fast processing with high quality
542
- - **🎯 Precise Control**: Fine-tunable guidance and steps
543
- - **🔧 No API Limits**: Runs locally without external dependencies
544
-
545
- **System Requirements:**
546
- - GPU-enabled environment (automatically handled in Spaces)
547
- - HF token for object detection API access
548
- - Processing time: 30-90 seconds depending on image size
549
- """)
550
 
551
  if __name__ == "__main__":
552
  demo.launch()
 
28
  flux_pipe = None
29
 
30
  # Initialize object detection using proven working models
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  def fuzzy_match_object(user_input, detected_labels):
33
  """
 
83
 
84
  return matches
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  except Exception as e:
88
  print(f"Detection error: {str(e)}")
89
  raise gr.Error(f"Object detection failed: {str(e)}")
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  @spaces.GPU
93
  def flux_inpainting(image, object_name, guidance_scale=2.5, steps=28):
 
125
  print(f"FLUX inpainting error: {str(e)}")
126
  return None, False
127
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  @spaces.GPU
130
+ def remove_objects(image, object_name, guidance_scale, steps):
131
  """
132
  Main function to remove any specified object using advanced detection + FLUX inpainting
133
  """
 
139
  raise gr.Error("Please enter the name of the object you want to remove")
140
 
141
  # Try to get token from multiple sources
142
+ token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
143
  if not token:
144
  raise gr.Error("Please provide your Hugging Face token or set HF_TOKEN in Space secrets")
145
 
 
148
  result_image, flux_success = flux_inpainting(image, object_name, guidance_scale, steps)
149
 
150
  if flux_success and result_image:
151
+ status_msg = f"✅ Successfully removed '{object_name}' object(s)\n"
 
 
 
152
  status_msg += f"⚙️ Settings: Guidance={guidance_scale}, Steps={steps}"
153
+ return result_image, status_msg
154
  else:
155
  # Fallback: show detection areas
156
+ status_msg = f"⚠️ Inpainting failed, but detection was successful\n"
 
 
 
157
  status_msg += f"💡 Try adjusting guidance scale or steps, or check GPU availability"
158
+ return result_image, status_msg
159
 
160
  except Exception as e:
161
  return image, None, f"❌ Error: {str(e)}"
 
203
  )
204
 
205
  with gr.Accordion("⚙️ Advanced Settings", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
  guidance_scale = gr.Slider(
208
  minimum=1.0,
 
248
  inputs=[
249
  input_image,
250
  object_name,
 
 
251
  guidance_scale,
252
  steps,
253
  ],
254
  outputs=[output_image, status_text]
255
  )
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
  if __name__ == "__main__":
260
  demo.launch()