kj03 commited on
Commit
ad03eea
·
verified ·
1 Parent(s): d2e67ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -17
app.py CHANGED
@@ -2,36 +2,62 @@ from transformers import DetrImageProcessor, DetrForObjectDetection
2
  from PIL import Image, ImageDraw
3
  import torch
4
  import gradio as gr
 
 
 
5
 
6
- # Load processor and model (downloaded on first run)
7
- processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
8
  model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
9
-
10
- # Label mapping
11
  labels = model.config.id2label
12
 
 
 
 
 
 
 
 
 
13
  def detect_objects(image):
14
- # Process the image
15
  inputs = processor(images=image, return_tensors="pt")
16
  outputs = model(**inputs)
17
-
18
- # Get target size (height, width)
19
  target_size = torch.tensor([image.size[::-1]])
20
  results = processor.post_process_object_detection(outputs, target_sizes=target_size, threshold=0.9)[0]
21
 
22
- # Draw results
23
  draw = ImageDraw.Draw(image)
24
- for box, label, score in zip(results["boxes"], results["labels"], results["scores"]):
25
- box = [round(x, 2) for x in box.tolist()]
26
- draw.rectangle(box, outline="red", width=3)
27
- draw.text((box[0], box[1]), f"{labels[label.item()]} ({round(score.item(), 2)})", fill="red")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- return image
30
 
31
  gr.Interface(
32
  fn=detect_objects,
33
- inputs=gr.Image(type="pil"),
34
- outputs=gr.Image(type="pil"),
35
- title="What’s This? - Object Detection",
36
- description="Upload an image to detect objects using DETR (facebook/detr-resnet-50)"
 
 
 
 
 
37
  ).launch()
 
2
  from PIL import Image, ImageDraw
3
  import torch
4
  import gradio as gr
5
+ import pyttsx3
6
+ import tempfile
7
+ import os
8
 
9
+ # Load model and processor
 
10
  model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
11
+ processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
 
12
  labels = model.config.id2label
13
 
14
+ def speak_text_to_file(text):
15
+ engine = pyttsx3.init()
16
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
17
+ path = f.name
18
+ engine.save_to_file(text, path)
19
+ engine.runAndWait()
20
+ return path
21
+
22
  def detect_objects(image):
 
23
  inputs = processor(images=image, return_tensors="pt")
24
  outputs = model(**inputs)
 
 
25
  target_size = torch.tensor([image.size[::-1]])
26
  results = processor.post_process_object_detection(outputs, target_sizes=target_size, threshold=0.9)[0]
27
 
 
28
  draw = ImageDraw.Draw(image)
29
+ object_details = []
30
+
31
+ for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
32
+ box = [round(i, 2) for i in box.tolist()]
33
+ label_name = labels[label.item()]
34
+ confidence = round(score.item(), 2)
35
+
36
+ draw.rectangle(box, outline="red", width=2)
37
+ draw.text((box[0], box[1] - 10), f"{label_name} ({confidence})", fill="red")
38
+
39
+ object_details.append(
40
+ f"{label_name} with confidence {confidence}"
41
+ )
42
+
43
+ if object_details:
44
+ summary_text = "Detected: " + ", ".join(object_details)
45
+ audio_path = speak_text_to_file(summary_text)
46
+ else:
47
+ summary_text = "No objects detected with high confidence."
48
+ audio_path = speak_text_to_file(summary_text)
49
 
50
+ return image, summary_text, audio_path
51
 
52
  gr.Interface(
53
  fn=detect_objects,
54
+ inputs=gr.Image(type="pil", source="webcam", label="Capture or Upload Image"),
55
+ outputs=[
56
+ gr.Image(type="pil", label="Detected Image"),
57
+ gr.Textbox(label="Detected Objects"),
58
+ gr.Audio(label="Spoken Summary")
59
+ ],
60
+ title="What’s This? – Real-Time Object Detector",
61
+ description="Take a picture or upload one to detect and hear object names.",
62
+ live=True
63
  ).launch()