kj03 commited on
Commit
5ffa482
·
verified ·
1 Parent(s): b892f74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -116
app.py CHANGED
@@ -1,122 +1,39 @@
 
1
  from transformers import DetrImageProcessor, DetrForObjectDetection
2
- from PIL import Image, ImageDraw, ImageFont
3
  import torch
4
- import gradio as gr
5
- from gtts import gTTS
6
- import tempfile
7
- import time
8
- import threading
9
- import queue
10
 
11
- # Load model and processor
12
- model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", ignore_mismatched_sizes=True)
13
  processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
14
- labels = model.config.id2label
15
-
16
- # Speak text to mp3 file
17
- def speak_text_to_file(text):
18
- tts = gTTS(text)
19
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
20
- tts.save(temp_file.name)
21
- return temp_file.name
22
 
23
- # Generate fallback error image
24
- def generate_error_image(msg):
25
- img = Image.new('RGB', (640, 480), (40, 40, 40))
26
- draw = ImageDraw.Draw(img)
27
- try:
28
- font = ImageFont.truetype("arial.ttf", 20)
29
- except:
30
- font = ImageFont.load_default()
31
- draw.text((20, 50), msg, fill=(255, 0, 0), font=font)
32
- return img
33
-
34
- # Object detection logic
35
  def detect_objects(image):
36
- if image is None:
37
- error_msg = "⚠️ No image captured! Please ensure your webcam is working."
38
- audio_path = speak_text_to_file(error_msg)
39
- return generate_error_image(error_msg), error_msg, audio_path
40
-
41
- try:
42
- inputs = processor(images=image, return_tensors="pt")
43
- outputs = model(**inputs)
44
- target_size = torch.tensor([image.size[::-1]])
45
- results = processor.post_process_object_detection(
46
- outputs, target_sizes=target_size, threshold=0.5
47
- )[0]
48
-
49
- draw = ImageDraw.Draw(image)
50
- object_details = []
51
- for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
52
- box = [round(i, 2) for i in box.tolist()]
53
- label_name = labels[label.item()]
54
- confidence = round(score.item(), 2)
55
- if confidence > 0.5:
56
- draw.rectangle(box, outline="red", width=3)
57
- draw.text((box[0], box[1] - 20), f"{label_name} {confidence:.0%}", fill="red")
58
- object_details.append(f"{label_name} ({confidence:.0%})")
59
-
60
- summary = "Detected: " + ", ".join(object_details[:5]) if object_details else "No objects detected with high confidence."
61
- audio_path = speak_text_to_file(summary)
62
- return image, summary, audio_path
63
- except Exception as e:
64
- error_msg = f"⚠️ Error during detection: {str(e)}"
65
- return generate_error_image(error_msg), error_msg, speak_text_to_file(error_msg)
66
-
67
- # Shared state and queues
68
- detection_flag = {"active": False}
69
- image_state = {"latest": None}
70
- result_queue = queue.Queue()
71
-
72
- # Detection thread runs every 5 seconds
73
- def detection_loop():
74
- last_time = 0
75
- while True:
76
- if detection_flag["active"] and image_state["latest"]:
77
- now = time.time()
78
- if now - last_time >= 5:
79
- result = detect_objects(image_state["latest"])
80
- result_queue.put(result)
81
- last_time = now
82
- time.sleep(1)
83
-
84
- threading.Thread(target=detection_loop, daemon=True).start()
85
-
86
- # Gradio UI
87
- with gr.Blocks(title="What's This? - Legacy Gradio Object Detector") as app:
88
- gr.Markdown("# 🔍 What's This? - Real-time Object Detector")
89
- gr.Markdown("Detects objects in webcam feed and reads them out loud every 5 seconds.")
90
-
91
- webcam = gr.Image(sources=["webcam"], type="pil", streaming=True, label="Live Webcam")
92
- output_img = gr.Image(type="pil", label="Detected Image")
93
- output_text = gr.Textbox(label="Detected Objects")
94
- audio_output = gr.Audio(type="filepath", label="Spoken Summary", autoplay=True)
95
- status = gr.Textbox(value="Detection paused", label="Status", interactive=False)
96
-
97
- start_btn = gr.Button("Start Detection", variant="primary")
98
- stop_btn = gr.Button("Stop Detection", variant="secondary")
99
-
100
- # Webcam frame update triggers result check
101
- def update_and_poll(image):
102
- image_state["latest"] = image
103
- if not result_queue.empty():
104
- img, txt, aud = result_queue.get()
105
- return img, txt, aud, "Detection running"
106
- return gr.update(), gr.update(), gr.update(), gr.update()
107
-
108
- webcam.change(update_and_poll, inputs=webcam, outputs=[output_img, output_text, audio_output, status])
109
-
110
- # Start and stop buttons toggle detection
111
- def start_detection():
112
- detection_flag["active"] = True
113
- return "Detection started"
114
-
115
- def stop_detection():
116
- detection_flag["active"] = False
117
- return "Detection paused"
118
-
119
- start_btn.click(start_detection, outputs=status)
120
- stop_btn.click(stop_detection, outputs=status)
121
-
122
- app.queue().launch()
 
1
+ import gradio as gr
2
  from transformers import DetrImageProcessor, DetrForObjectDetection
 
3
  import torch
4
+ from PIL import Image, ImageDraw
 
 
 
 
 
5
 
6
+ # Load pre-trained model and processor
 
7
  processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
8
+ model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
 
 
 
 
 
 
 
9
 
10
+ # Object detection function
 
 
 
 
 
 
 
 
 
 
 
11
  def detect_objects(image):
12
+ # Convert image and run model
13
+ inputs = processor(images=image, return_tensors="pt")
14
+ outputs = model(**inputs)
15
+
16
+ # Get outputs
17
+ target_sizes = torch.tensor([image.size[::-1]]) # PIL: (W, H) -> expected (H, W)
18
+ results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
19
+
20
+ # Draw boxes on the image
21
+ draw = ImageDraw.Draw(image)
22
+ for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
23
+ box = [round(i, 2) for i in box.tolist()]
24
+ draw.rectangle(box, outline="red", width=3)
25
+ draw.text((box[0], box[1]), f"{model.config.id2label[label.item()]}: {round(score.item(), 3)}", fill="red")
26
+
27
+ return image
28
+
29
+ # Launch Gradio interface
30
+ demo = gr.Interface(
31
+ fn=detect_objects,
32
+ inputs=gr.Image(source="camera", tool="editor", live=True),
33
+ outputs=gr.Image(type="pil"),
34
+ title="Real-Time Object Detection",
35
+ description="Open webcam and detect objects using facebook/detr-resnet-50"
36
+ )
37
+
38
+ if __name__ == "__main__":
39
+ demo.launch()