Reaper200 commited on
Commit
d61b4cb
·
verified ·
1 Parent(s): a63324d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -50
app.py CHANGED
@@ -1,52 +1,79 @@
1
  import streamlit as st
2
- from transformers import DetrForObjectDetection, DetrImageProcessor
3
  from PIL import Image
4
- import torch
5
- import matplotlib.pyplot as plt
6
- import matplotlib.patches as patches
7
-
8
- # Load the model and processor
9
- model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
10
- processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
11
-
12
- st.title("Context-Aware Object Detection")
13
- st.write("Upload an image to detect objects with contextual awareness.")
14
-
15
- # Upload an image
16
- uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
17
-
18
- if uploaded_file is not None:
19
- # Open the uploaded image
20
- image = Image.open(uploaded_file)
21
- st.image(image, caption="Uploaded Image", use_column_width=True)
22
-
23
- # Preprocess the image and make predictions
24
- inputs = processor(images=image, return_tensors="pt")
25
- outputs = model(**inputs)
26
-
27
- # Get logits and bounding boxes
28
- logits = outputs.logits.softmax(-1)[0]
29
- boxes = outputs.pred_boxes[0]
30
-
31
- # Set a confidence threshold for displaying boxes
32
- threshold = 0.9
33
- labels = processor.tokenizer.convert_ids_to_tokens(logits.argmax(-1))
34
- scores = logits.max(-1).values
35
-
36
- # Display the image with bounding boxes
37
- fig, ax = plt.subplots(1)
38
- ax.imshow(image)
39
-
40
- # Plot each detected object if it meets the confidence threshold
41
- for score, label, box in zip(scores, labels, boxes):
42
- if score > threshold:
43
- # Convert bounding box coordinates to absolute pixel values
44
- x, y, w, h = box * torch.tensor([image.width, image.height, image.width, image.height])
45
- x0, y0 = x - w / 2, y - h / 2
46
-
47
- # Draw the bounding box
48
- rect = patches.Rectangle((x0, y0), w, h, linewidth=2, edgecolor='r', facecolor='none')
49
- ax.add_patch(rect)
50
- ax.text(x0, y0, f"{label}: {score:.2f}", color='red', fontsize=8, weight='bold')
51
-
52
- st.pyplot(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers import pipeline
3
  from PIL import Image
4
+ import time
5
+ from gtts import gTTS
6
+ import os
7
+
8
+ # Mock function to simulate image capture
9
+ def capture_image():
10
+ st.write("Simulating image capture...")
11
+ time.sleep(1)
12
+ return "sample_image.jpg" # Replace with actual path to an image file or a placeholder
13
+
14
+ # Mock object detection function
15
+ def detect_objects(image_path):
16
+ st.write("Detecting objects in the image...")
17
+ # Simulated output
18
+ return ["table", "chair", "lamp"]
19
+
20
+ # Mock context-aware filter function
21
+ def filter_relevant_objects(detected_objects, setting):
22
+ st.write(f"Filtering relevant objects for setting: {setting}")
23
+ # Simulated filtering based on setting
24
+ if setting == "indoor":
25
+ return [obj for obj in detected_objects if obj in ["table", "lamp"]]
26
+ return detected_objects
27
+
28
+ # Mock summarization function
29
+ def generate_summary(relevant_objects):
30
+ st.write("Generating summary for relevant objects...")
31
+ # Simulated summary
32
+ summary = f"This is an {len(relevant_objects)}-item scene including: {', '.join(relevant_objects)}."
33
+ return summary
34
+
35
+ # Mock text-to-speech function
36
+ def text_to_speech(text):
37
+ st.write("Converting summary to speech...")
38
+ tts = gTTS(text)
39
+ tts.save("summary.mp3")
40
+ st.audio("summary.mp3")
41
+
42
+ # Mock GPS navigation function
43
+ def get_distance_to_object(address):
44
+ st.write(f"Calculating distance to address: {address}")
45
+ # Simulated output
46
+ return "5 km", "15 mins"
47
+
48
+ # Streamlit app main function
49
+ def main():
50
+ st.title("Context-Aware Object Detection with Hugging Face")
51
+
52
+ # Step 1: Capture Image (simulated)
53
+ image_path = capture_image()
54
+ st.image(image_path, caption="Captured Image")
55
+
56
+ # Step 2: Detect Objects
57
+ detected_objects = detect_objects(image_path)
58
+ st.write(f"Detected Objects: {detected_objects}")
59
+
60
+ # Step 3: Filter Relevant Objects
61
+ setting = st.selectbox("Select Setting", ["indoor", "outdoor"], index=0)
62
+ relevant_objects = filter_relevant_objects(detected_objects, setting)
63
+ st.write(f"Relevant Objects: {relevant_objects}")
64
+
65
+ # Step 4: Generate Summary
66
+ summary = generate_summary(relevant_objects)
67
+ st.write(f"Summary: {summary}")
68
+
69
+ # Step 5: Convert Summary to Speech
70
+ text_to_speech(summary)
71
+
72
+ # Step 6: GPS Navigation (simulated)
73
+ address = st.text_input("Enter Object's Address", "1600 Amphitheatre Parkway, Mountain View, CA")
74
+ if st.button("Get Distance to Object"):
75
+ distance, duration = get_distance_to_object(address)
76
+ st.write(f"Distance to Object: {distance}, Duration: {duration}")
77
+
78
+ if __name__ == "__main__":
79
+ main()