Reaper200's picture
Update app.py
21cbf36 verified
raw
history blame
4.25 kB
import streamlit as st
from PIL import Image
import cv2
import numpy as np
from gtts import gTTS
import os
# Load pre-trained model and classes
def load_model():
# Load YOLO model from OpenCV
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg") # Ensure these files are in the working directory
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
return net, output_layers
# Object detection function
def detect_objects(image, net, output_layers):
height, width, _ = image.shape
# Prepare the image for detection
blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outputs = net.forward(output_layers)
# Process the outputs
detected_objects = []
for out in outputs:
for detection in out:
scores = detection[5:] # Get the scores for detected objects
class_id = np.argmax(scores)
confidence = scores[class_id]
# Filter out weak predictions
if confidence > 0.5: # Adjust threshold as needed
detected_objects.append(class_id)
return detected_objects
# Mock function to convert class IDs to object names
def get_object_names(class_ids):
# Sample mapping (extend this according to your class IDs)
class_names = {0: "person", 1: "bicycle", 2: "car", 3: "motorcycle", 4: "airplane",
5: "bus", 6: "train", 7: "truck", 8: "boat", 9: "traffic light",
10: "fire hydrant", 11: "stop sign", 12: "parking meter", 13: "bench",
14: "bird", 15: "cat", 16: "dog", 17: "horse", 18: "sheep", 19: "cow"}
return [class_names[id] for id in class_ids if id in class_names]
# Mock context-aware filter function
def filter_relevant_objects(detected_objects, setting):
st.write(f"Filtering relevant objects for setting: {setting}")
if setting == "indoor":
return [obj for obj in detected_objects if obj in ["table", "lamp"]]
return detected_objects
# Mock summarization function
def generate_summary(relevant_objects):
st.write("Generating summary for relevant objects...")
summary = f"This is an {len(relevant_objects)}-item scene including: {', '.join(relevant_objects)}."
return summary
# Mock text-to-speech function
def text_to_speech(text):
st.write("Converting summary to speech...")
tts = gTTS(text)
tts.save("summary.mp3")
st.audio("summary.mp3")
# Mock GPS navigation function
def get_distance_to_object(address):
st.write(f"Calculating distance to address: {address}")
return "5 km", "15 mins"
# Streamlit app main function
def main():
st.title("Context-Aware Object Detection App")
# Load the YOLO model
net, output_layers = load_model()
# Step 1: Capture Image from Camera
captured_image = st.camera_input("Take a picture")
if captured_image is not None:
# Open the captured image
image = Image.open(captured_image)
image_np = np.array(image) # Convert PIL image to numpy array
st.image(image, caption="Captured Image", use_column_width=True)
# Step 2: Detect Objects
detected_ids = detect_objects(image_np, net, output_layers)
detected_objects = get_object_names(detected_ids)
st.write(f"Detected Objects: {detected_objects}")
# Step 3: Filter Relevant Objects
setting = st.selectbox("Select Setting", ["indoor", "outdoor"], index=0)
relevant_objects = filter_relevant_objects(detected_objects, setting)
st.write(f"Relevant Objects: {relevant_objects}")
# Step 4: Generate Summary
summary = generate_summary(relevant_objects)
st.write(f"Summary: {summary}")
# Step 5: Convert Summary to Speech
text_to_speech(summary)
# Step 6: GPS Navigation (simulated)
address = st.text_input("Enter Object's Address", "1600 Amphitheatre Parkway, Mountain View, CA")
if st.button("Get Distance to Object"):
distance, duration = get_distance_to_object(address)
st.write(f"Distance to Object: {distance}, Duration: {duration}")
if __name__ == "__main__":
main()