stpete2 commited on
Commit
b5cec06
Β·
verified Β·
1 Parent(s): 86b7d4a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +470 -0
app.py ADDED
@@ -0,0 +1,470 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ import os
5
+ import sys
6
+ from ultralytics import YOLO
7
+ from PIL import Image
8
+ import time
9
+ from collections import Counter
10
+
11
+ print("Python version:", sys.version)
12
+ print("Gradio version:", gr.__version__)
13
+
14
+
15
+ class SimpleObjectDetector:
16
+ def __init__(self):
17
+ """Initialize YOLO11n general object detector"""
18
+ self.model = None
19
+ try:
20
+ self.model = YOLO('yolo11n.pt')
21
+ print("βœ… YOLO11n model initialization complete")
22
+ print("πŸ“¦ Can detect 80 object classes: person, car, animals, etc.")
23
+ except Exception as e:
24
+ import traceback
25
+ print(f"⚠️ Model initialization error: {e}")
26
+ traceback.print_exc()
27
+ print("πŸ”„ Running in dummy mode")
28
+
29
+ def detect(self, image, conf_threshold=0.25):
30
+ """Object detection process"""
31
+ if image is None:
32
+ return None, []
33
+
34
+ if self.model is None:
35
+ # Dummy processing
36
+ result = image.copy()
37
+ cv2.putText(result, "MODEL NOT FOUND", (50, 100),
38
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
39
+ return result, []
40
+
41
+ try:
42
+ # Run inference
43
+ results = self.model(image, conf=conf_threshold)
44
+
45
+ detections = []
46
+ if len(results) > 0:
47
+ annotated = results[0].plot()
48
+
49
+ # Get detection details
50
+ for box in results[0].boxes:
51
+ class_id = int(box.cls[0])
52
+ class_name = results[0].names[class_id]
53
+ confidence = float(box.conf[0])
54
+ detections.append({
55
+ 'class': class_name,
56
+ 'confidence': confidence
57
+ })
58
+
59
+ return annotated, detections
60
+
61
+ return image, []
62
+ except Exception as e:
63
+ print(f"Detection Error: {e}")
64
+ return image, []
65
+
66
+ def detect_video(self, video_path, conf_threshold=0.25, progress=gr.Progress()):
67
+ """Process video file with object detection"""
68
+ if video_path is None:
69
+ return None, "Please upload a video"
70
+
71
+ if self.model is None:
72
+ return None, "❌ Model not loaded. Cannot process video."
73
+
74
+ try:
75
+ # Open video
76
+ cap = cv2.VideoCapture(video_path)
77
+ if not cap.isOpened():
78
+ return None, "❌ Failed to open video file"
79
+
80
+ # Get video properties
81
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
82
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
83
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
84
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
85
+
86
+ # Create output video file
87
+ output_path = "output_detected.mp4"
88
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
89
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
90
+
91
+ # Statistics
92
+ all_detections = []
93
+ frame_count = 0
94
+ start_time = time.time()
95
+
96
+ progress(0, desc="Starting video processing...")
97
+
98
+ # Process each frame
99
+ while True:
100
+ ret, frame = cap.read()
101
+ if not ret:
102
+ break
103
+
104
+ # Run detection
105
+ annotated_frame, detections = self.detect(frame, conf_threshold)
106
+
107
+ # Write frame to output
108
+ out.write(annotated_frame)
109
+
110
+ # Store detections
111
+ all_detections.extend(detections)
112
+ frame_count += 1
113
+
114
+ # Update progress
115
+ if frame_count % 10 == 0:
116
+ progress_pct = frame_count / total_frames
117
+ progress(progress_pct, desc=f"Processing frame {frame_count}/{total_frames}")
118
+
119
+ # Release resources
120
+ cap.release()
121
+ out.release()
122
+
123
+ processing_time = time.time() - start_time
124
+
125
+ # Generate statistics
126
+ if len(all_detections) > 0:
127
+ class_counts = Counter([det['class'] for det in all_detections])
128
+
129
+ result_text = f"βœ… Video Processing Complete!\n\n"
130
+ result_text += f"πŸ“Š Statistics:\n"
131
+ result_text += f"- Total Frames: {frame_count}\n"
132
+ result_text += f"- Total Detections: {len(all_detections)}\n"
133
+ result_text += f"- Processing Time: {processing_time:.2f} seconds\n"
134
+ result_text += f"- FPS: {frame_count/processing_time:.1f}\n\n"
135
+
136
+ result_text += f"🎯 Detected Objects (Total Count):\n"
137
+ for obj_class, count in class_counts.most_common():
138
+ result_text += f" β€’ {obj_class.upper()}: {count}\n"
139
+
140
+ result_text += f"\nπŸ’‘ Average detections per frame: {len(all_detections)/frame_count:.1f}"
141
+ else:
142
+ result_text = f"❌ No objects detected in {frame_count} frames\n\n"
143
+ result_text += "Try:\n- Lowering confidence threshold\n- Using a different video"
144
+
145
+ return output_path, result_text
146
+
147
+ except Exception as e:
148
+ import traceback
149
+ error_msg = f"❌ Video processing error: {str(e)}\n\n"
150
+ error_msg += traceback.format_exc()
151
+ print(error_msg)
152
+ return None, error_msg
153
+
154
+ # Create instance
155
+ detector = SimpleObjectDetector()
156
+
157
+ def process_image(image, conf_threshold):
158
+ """Image processing function"""
159
+ if image is None:
160
+ return None, "Please upload an image"
161
+
162
+ # Convert from RGB (Gradio) to BGR (OpenCV)
163
+ if len(image.shape) == 3:
164
+ if image.shape[2] == 4: # If RGBA
165
+ image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
166
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
167
+
168
+ # Run object detection
169
+ start_time = time.time()
170
+ result, detections = detector.detect(image, conf_threshold)
171
+ processing_time = time.time() - start_time
172
+
173
+ # Convert back to RGB
174
+ if result is not None:
175
+ result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB)
176
+
177
+ # Format detection results
178
+ if len(detections) > 0:
179
+ result_text = f"βœ… Detected {len(detections)} object(s):\n\n"
180
+ for i, det in enumerate(detections, 1):
181
+ result_text += f"{i}. {det['class'].upper()} - Confidence: {det['confidence']*100:.1f}%\n"
182
+ result_text += f"\n⏱️ Processing time: {processing_time:.2f} seconds"
183
+ else:
184
+ result_text = "❌ No objects detected\n\nTry:\n- Adjusting confidence threshold\n- Using a clearer image\n- Getting closer to objects"
185
+
186
+ return result, result_text
187
+
188
+ def flip_image(image):
189
+ """Flip image horizontally"""
190
+ if image is None:
191
+ return None
192
+
193
+ if isinstance(image, Image.Image):
194
+ image = np.array(image)
195
+
196
+ return cv2.flip(image, 1)
197
+
198
+ def rotate_image(image, angle):
199
+ """Rotate image by specified angle"""
200
+ if image is None:
201
+ return None
202
+
203
+ if isinstance(image, Image.Image):
204
+ image = np.array(image)
205
+
206
+ height, width = image.shape[:2]
207
+ center = (width // 2, height // 2)
208
+ rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
209
+ rotated = cv2.warpAffine(image, rotation_matrix, (width, height))
210
+
211
+ return rotated
212
+
213
+ def adjust_brightness_contrast(image, bright_val, contrast_val):
214
+ """Adjust image brightness and contrast"""
215
+ if image is None:
216
+ return None
217
+
218
+ if isinstance(image, Image.Image):
219
+ image = np.array(image)
220
+
221
+ img_float = image.astype(np.float32) / 255.0
222
+ adjusted = img_float * contrast_val + (bright_val - 1.0)
223
+ adjusted = np.clip(adjusted, 0, 1)
224
+ adjusted = (adjusted * 255).astype(np.uint8)
225
+
226
+ return adjusted
227
+
228
+ def generate_test_image():
229
+ """Generate test image with sample objects"""
230
+ img = np.ones((480, 640, 3), dtype=np.uint8) * 230
231
+
232
+ # Draw sample shapes
233
+ cv2.rectangle(img, (100, 150), (200, 300), (50, 50, 200), -1) # "car-like" shape
234
+ cv2.circle(img, (400, 200), 50, (200, 50, 50), -1) # circle
235
+
236
+ cv2.putText(img, "TEST IMAGE", (200, 50),
237
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
238
+ cv2.putText(img, "Click 'Detect Objects' to test", (150, 400),
239
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
240
+
241
+ return img
242
+
243
+ # Create Gradio Interface
244
+ with gr.Blocks(title="YOLO11n Object Detection - Image & Video", theme=gr.themes.Soft()) as demo:
245
+ gr.Markdown("# πŸ“±πŸ” YOLO11n Object Detection - Image & Video")
246
+ gr.Markdown("### Detect 80 types of objects in images and videos!")
247
+
248
+ with gr.Tabs():
249
+ # ===== IMAGE TAB =====
250
+ with gr.Tab("πŸ“Έ Image Detection"):
251
+ with gr.Row():
252
+ with gr.Column(scale=1):
253
+ # Instructions
254
+ gr.Markdown("""
255
+ ## πŸ“‹ How to Use:
256
+ 1. **Upload** an image or **Take Photo** (mobile)
257
+ 2. Adjust **confidence threshold** if needed
258
+ 3. Click **πŸš€ Detect Objects**
259
+
260
+ **Detectable Objects:**
261
+ - πŸš— Vehicles (car, truck, bus, motorcycle, bicycle)
262
+ - 🧍 People and body parts
263
+ - πŸ• Animals (dog, cat, bird, horse, etc.)
264
+ - ⚽ Sports equipment
265
+ - πŸͺ‘ Furniture and household items
266
+ - And 60+ more categories!
267
+ """)
268
+
269
+ # Image input
270
+ with gr.Group():
271
+ gr.Markdown("### πŸ“Έ Capture or Upload Image")
272
+ image_input = gr.Image(
273
+ label="Input Image",
274
+ type="numpy",
275
+ sources=["upload"],
276
+ interactive=True
277
+ )
278
+
279
+ # Confidence threshold
280
+ img_conf_slider = gr.Slider(
281
+ 0.1, 0.9,
282
+ value=0.25,
283
+ step=0.05,
284
+ label="🎯 Confidence Threshold",
285
+ info="Lower = more detections (may include false positives)"
286
+ )
287
+
288
+ # Image manipulation controls
289
+ with gr.Accordion("πŸ”„ Image Adjustments", open=False):
290
+ with gr.Row():
291
+ flip_btn = gr.Button("πŸͺž Flip", size="sm")
292
+ rotate_90_btn = gr.Button("β†ͺ️ Rotate 90Β°", size="sm")
293
+ rotate_180_btn = gr.Button("πŸ”„ Rotate 180Β°", size="sm")
294
+
295
+ brightness = gr.Slider(0.5, 2.0, value=1.0, label="β˜€οΈ Brightness")
296
+ contrast = gr.Slider(0.5, 2.0, value=1.0, label="🎨 Contrast")
297
+
298
+ with gr.Column(scale=1):
299
+ # Detection results
300
+ gr.Markdown("## πŸ” Detection Results")
301
+ output_image = gr.Image(label="Detected Objects", interactive=False)
302
+
303
+ # Detection button
304
+ detect_btn = gr.Button(
305
+ "πŸš€ Detect Objects",
306
+ variant="primary",
307
+ size="lg"
308
+ )
309
+
310
+ # Results text
311
+ results_text = gr.Textbox(
312
+ label="πŸ“Š Detection Details",
313
+ lines=10,
314
+ interactive=False
315
+ )
316
+
317
+ # Test section
318
+ with gr.Accordion("πŸ§ͺ Test & Examples", open=False):
319
+ with gr.Row():
320
+ test_btn = gr.Button("Generate Test Image")
321
+
322
+ gr.Markdown("""
323
+ **πŸ’‘ Tips for Best Results:**
324
+ - Use clear, well-lit photos
325
+ - Ensure objects are not too far away
326
+ - Avoid heavy shadows or blur
327
+ - Try different confidence thresholds
328
+ """)
329
+
330
+ # ===== VIDEO TAB =====
331
+ with gr.Tab("πŸŽ₯ Video Detection"):
332
+ with gr.Row():
333
+ with gr.Column(scale=1):
334
+ gr.Markdown("""
335
+ ## πŸ“Ή Video Object Detection
336
+
337
+ Upload a video file and detect objects in every frame!
338
+
339
+ **Supported formats:** MP4, AVI, MOV, MKV
340
+
341
+ **Note:** Processing may take time depending on video length.
342
+ For best performance, use videos under 1 minute.
343
+ """)
344
+
345
+ video_input = gr.Video(
346
+ label="πŸ“€ Upload Video",
347
+ sources=["upload"]
348
+ )
349
+
350
+ video_conf_slider = gr.Slider(
351
+ 0.1, 0.9,
352
+ value=0.25,
353
+ step=0.05,
354
+ label="🎯 Confidence Threshold",
355
+ info="Lower = more detections"
356
+ )
357
+
358
+ process_video_btn = gr.Button(
359
+ "🎬 Process Video",
360
+ variant="primary",
361
+ size="lg"
362
+ )
363
+
364
+ with gr.Column(scale=1):
365
+ gr.Markdown("## πŸ“Š Processed Video & Statistics")
366
+
367
+ video_output = gr.Video(
368
+ label="Processed Video with Detections"
369
+ )
370
+
371
+ video_results_text = gr.Textbox(
372
+ label="πŸ“ˆ Video Statistics",
373
+ lines=15,
374
+ interactive=False
375
+ )
376
+
377
+ with gr.Accordion("πŸ’‘ Video Processing Tips", open=False):
378
+ gr.Markdown("""
379
+ **Optimization Tips:**
380
+ - Higher confidence threshold = faster processing
381
+ - Shorter videos = quicker results
382
+ - Good lighting improves detection accuracy
383
+ - Stable camera position works better than shaky footage
384
+
385
+ **What gets detected:**
386
+ - Moving objects (cars, people, animals)
387
+ - Static objects (furniture, signs, equipment)
388
+ - Multiple objects simultaneously
389
+ """)
390
+
391
+ # ===== EVENT HANDLERS - IMAGE TAB =====
392
+ test_btn.click(
393
+ fn=generate_test_image,
394
+ outputs=image_input
395
+ )
396
+
397
+ flip_btn.click(
398
+ fn=flip_image,
399
+ inputs=image_input,
400
+ outputs=image_input
401
+ )
402
+
403
+ rotate_90_btn.click(
404
+ fn=lambda img: rotate_image(img, 90),
405
+ inputs=image_input,
406
+ outputs=image_input
407
+ )
408
+
409
+ rotate_180_btn.click(
410
+ fn=lambda img: rotate_image(img, 180),
411
+ inputs=image_input,
412
+ outputs=image_input
413
+ )
414
+
415
+ brightness.change(
416
+ fn=lambda img, b, c: adjust_brightness_contrast(img, b, c) if img is not None else None,
417
+ inputs=[image_input, brightness, contrast],
418
+ outputs=image_input
419
+ )
420
+
421
+ contrast.change(
422
+ fn=lambda img, b, c: adjust_brightness_contrast(img, b, c) if img is not None else None,
423
+ inputs=[image_input, brightness, contrast],
424
+ outputs=image_input
425
+ )
426
+
427
+ detect_btn.click(
428
+ fn=process_image,
429
+ inputs=[image_input, img_conf_slider],
430
+ outputs=[output_image, results_text]
431
+ )
432
+
433
+ # ===== EVENT HANDLERS - VIDEO TAB =====
434
+ process_video_btn.click(
435
+ fn=detector.detect_video,
436
+ inputs=[video_input, video_conf_slider],
437
+ outputs=[video_output, video_results_text]
438
+ )
439
+
440
+ if __name__ == "__main__":
441
+ print("=" * 60)
442
+ print("πŸš€ YOLO11n Object Detection - Image & Video Support")
443
+ print("=" * 60)
444
+ print("πŸ“¦ Detects 80 object classes including:")
445
+ print(" - People, vehicles, animals")
446
+ print(" - Furniture, sports equipment")
447
+ print(" - Electronics, food items, and more!")
448
+ print("=" * 60)
449
+ print("πŸ–ΌοΈ Image Mode: Instant detection on photos")
450
+ print("πŸŽ₯ Video Mode: Frame-by-frame detection")
451
+ print("=" * 60)
452
+ print("🌐 Access via: http://localhost:7860")
453
+ print("πŸ“± Mobile: Use same network with computer's IP:7860")
454
+ print("=" * 60)
455
+
456
+ try:
457
+ demo.launch(
458
+ server_name="0.0.0.0",
459
+ server_port=7860,
460
+ debug=False,
461
+ share=True,
462
+ show_error=True,
463
+ max_file_size="100MB" # Increased for video files
464
+ )
465
+ except Exception as e:
466
+ print(f"❌ Launch Error: {e}")
467
+ print("\nπŸ”§ Troubleshooting:")
468
+ print("1. Try different port: demo.launch(server_port=7861)")
469
+ print("2. Check firewall settings")
470
+ print("3. Ensure ultralytics is installed: pip install ultralytics")