Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,122 +1,39 @@
|
|
|
|
|
| 1 |
from transformers import DetrImageProcessor, DetrForObjectDetection
|
| 2 |
-
from PIL import Image, ImageDraw, ImageFont
|
| 3 |
import torch
|
| 4 |
-
import
|
| 5 |
-
from gtts import gTTS
|
| 6 |
-
import tempfile
|
| 7 |
-
import time
|
| 8 |
-
import threading
|
| 9 |
-
import queue
|
| 10 |
|
| 11 |
-
# Load model and processor
|
| 12 |
-
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", ignore_mismatched_sizes=True)
|
| 13 |
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
# Speak text to mp3 file
|
| 17 |
-
def speak_text_to_file(text):
|
| 18 |
-
tts = gTTS(text)
|
| 19 |
-
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
| 20 |
-
tts.save(temp_file.name)
|
| 21 |
-
return temp_file.name
|
| 22 |
|
| 23 |
-
#
|
| 24 |
-
def generate_error_image(msg):
|
| 25 |
-
img = Image.new('RGB', (640, 480), (40, 40, 40))
|
| 26 |
-
draw = ImageDraw.Draw(img)
|
| 27 |
-
try:
|
| 28 |
-
font = ImageFont.truetype("arial.ttf", 20)
|
| 29 |
-
except:
|
| 30 |
-
font = ImageFont.load_default()
|
| 31 |
-
draw.text((20, 50), msg, fill=(255, 0, 0), font=font)
|
| 32 |
-
return img
|
| 33 |
-
|
| 34 |
-
# Object detection logic
|
| 35 |
def detect_objects(image):
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
)
|
| 48 |
-
|
| 49 |
-
draw
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
error_msg = f"⚠️ Error during detection: {str(e)}"
|
| 65 |
-
return generate_error_image(error_msg), error_msg, speak_text_to_file(error_msg)
|
| 66 |
-
|
| 67 |
-
# Shared state and queues
|
| 68 |
-
detection_flag = {"active": False}
|
| 69 |
-
image_state = {"latest": None}
|
| 70 |
-
result_queue = queue.Queue()
|
| 71 |
-
|
| 72 |
-
# Detection thread runs every 5 seconds
|
| 73 |
-
def detection_loop():
|
| 74 |
-
last_time = 0
|
| 75 |
-
while True:
|
| 76 |
-
if detection_flag["active"] and image_state["latest"]:
|
| 77 |
-
now = time.time()
|
| 78 |
-
if now - last_time >= 5:
|
| 79 |
-
result = detect_objects(image_state["latest"])
|
| 80 |
-
result_queue.put(result)
|
| 81 |
-
last_time = now
|
| 82 |
-
time.sleep(1)
|
| 83 |
-
|
| 84 |
-
threading.Thread(target=detection_loop, daemon=True).start()
|
| 85 |
-
|
| 86 |
-
# Gradio UI
|
| 87 |
-
with gr.Blocks(title="What's This? - Legacy Gradio Object Detector") as app:
|
| 88 |
-
gr.Markdown("# 🔍 What's This? - Real-time Object Detector")
|
| 89 |
-
gr.Markdown("Detects objects in webcam feed and reads them out loud every 5 seconds.")
|
| 90 |
-
|
| 91 |
-
webcam = gr.Image(sources=["webcam"], type="pil", streaming=True, label="Live Webcam")
|
| 92 |
-
output_img = gr.Image(type="pil", label="Detected Image")
|
| 93 |
-
output_text = gr.Textbox(label="Detected Objects")
|
| 94 |
-
audio_output = gr.Audio(type="filepath", label="Spoken Summary", autoplay=True)
|
| 95 |
-
status = gr.Textbox(value="Detection paused", label="Status", interactive=False)
|
| 96 |
-
|
| 97 |
-
start_btn = gr.Button("Start Detection", variant="primary")
|
| 98 |
-
stop_btn = gr.Button("Stop Detection", variant="secondary")
|
| 99 |
-
|
| 100 |
-
# Webcam frame update triggers result check
|
| 101 |
-
def update_and_poll(image):
|
| 102 |
-
image_state["latest"] = image
|
| 103 |
-
if not result_queue.empty():
|
| 104 |
-
img, txt, aud = result_queue.get()
|
| 105 |
-
return img, txt, aud, "Detection running"
|
| 106 |
-
return gr.update(), gr.update(), gr.update(), gr.update()
|
| 107 |
-
|
| 108 |
-
webcam.change(update_and_poll, inputs=webcam, outputs=[output_img, output_text, audio_output, status])
|
| 109 |
-
|
| 110 |
-
# Start and stop buttons toggle detection
|
| 111 |
-
def start_detection():
|
| 112 |
-
detection_flag["active"] = True
|
| 113 |
-
return "Detection started"
|
| 114 |
-
|
| 115 |
-
def stop_detection():
|
| 116 |
-
detection_flag["active"] = False
|
| 117 |
-
return "Detection paused"
|
| 118 |
-
|
| 119 |
-
start_btn.click(start_detection, outputs=status)
|
| 120 |
-
stop_btn.click(stop_detection, outputs=status)
|
| 121 |
-
|
| 122 |
-
app.queue().launch()
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
from transformers import DetrImageProcessor, DetrForObjectDetection
|
|
|
|
| 3 |
import torch
|
| 4 |
+
from PIL import Image, ImageDraw
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
+
# Load pre-trained model and processor
|
|
|
|
| 7 |
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
|
| 8 |
+
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
# Object detection function
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
def detect_objects(image):
|
| 12 |
+
# Convert image and run model
|
| 13 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 14 |
+
outputs = model(**inputs)
|
| 15 |
+
|
| 16 |
+
# Get outputs
|
| 17 |
+
target_sizes = torch.tensor([image.size[::-1]]) # PIL: (W, H) -> expected (H, W)
|
| 18 |
+
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
|
| 19 |
+
|
| 20 |
+
# Draw boxes on the image
|
| 21 |
+
draw = ImageDraw.Draw(image)
|
| 22 |
+
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
|
| 23 |
+
box = [round(i, 2) for i in box.tolist()]
|
| 24 |
+
draw.rectangle(box, outline="red", width=3)
|
| 25 |
+
draw.text((box[0], box[1]), f"{model.config.id2label[label.item()]}: {round(score.item(), 3)}", fill="red")
|
| 26 |
+
|
| 27 |
+
return image
|
| 28 |
+
|
| 29 |
+
# Launch Gradio interface
|
| 30 |
+
demo = gr.Interface(
|
| 31 |
+
fn=detect_objects,
|
| 32 |
+
inputs=gr.Image(source="camera", tool="editor", live=True),
|
| 33 |
+
outputs=gr.Image(type="pil"),
|
| 34 |
+
title="Real-Time Object Detection",
|
| 35 |
+
description="Open webcam and detect objects using facebook/detr-resnet-50"
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
if __name__ == "__main__":
|
| 39 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|