Spaces:

Reaper200
/

ContextAwareObjectDetector

Runtime error

App Files Files Community

ContextAwareObjectDetector / app.py

Reaper200

Update app.py

21cbf36 verified over 1 year ago

raw

history blame

4.25 kB

	import streamlit as st
	from PIL import Image
	import cv2
	import numpy as np
	from gtts import gTTS
	import os

	# Load pre-trained model and classes
	def load_model():
	# Load YOLO model from OpenCV
	net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg") # Ensure these files are in the working directory
	layer_names = net.getLayerNames()
	output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
	return net, output_layers

	# Object detection function
	def detect_objects(image, net, output_layers):
	height, width, _ = image.shape

	# Prepare the image for detection
	blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
	net.setInput(blob)
	outputs = net.forward(output_layers)

	# Process the outputs
	detected_objects = []
	for out in outputs:
	for detection in out:
	scores = detection[5:] # Get the scores for detected objects
	class_id = np.argmax(scores)
	confidence = scores[class_id]

	# Filter out weak predictions
	if confidence > 0.5: # Adjust threshold as needed
	detected_objects.append(class_id)

	return detected_objects

	# Mock function to convert class IDs to object names
	def get_object_names(class_ids):
	# Sample mapping (extend this according to your class IDs)
	class_names = {0: "person", 1: "bicycle", 2: "car", 3: "motorcycle", 4: "airplane",
	5: "bus", 6: "train", 7: "truck", 8: "boat", 9: "traffic light",
	10: "fire hydrant", 11: "stop sign", 12: "parking meter", 13: "bench",
	14: "bird", 15: "cat", 16: "dog", 17: "horse", 18: "sheep", 19: "cow"}
	return [class_names[id] for id in class_ids if id in class_names]

	# Mock context-aware filter function
	def filter_relevant_objects(detected_objects, setting):
	st.write(f"Filtering relevant objects for setting: {setting}")
	if setting == "indoor":
	return [obj for obj in detected_objects if obj in ["table", "lamp"]]
	return detected_objects

	# Mock summarization function
	def generate_summary(relevant_objects):
	st.write("Generating summary for relevant objects...")
	summary = f"This is an {len(relevant_objects)}-item scene including: {', '.join(relevant_objects)}."
	return summary

	# Mock text-to-speech function
	def text_to_speech(text):
	st.write("Converting summary to speech...")
	tts = gTTS(text)
	tts.save("summary.mp3")
	st.audio("summary.mp3")

	# Mock GPS navigation function
	def get_distance_to_object(address):
	st.write(f"Calculating distance to address: {address}")
	return "5 km", "15 mins"

	# Streamlit app main function
	def main():
	st.title("Context-Aware Object Detection App")

	# Load the YOLO model
	net, output_layers = load_model()

	# Step 1: Capture Image from Camera
	captured_image = st.camera_input("Take a picture")

	if captured_image is not None:
	# Open the captured image
	image = Image.open(captured_image)
	image_np = np.array(image) # Convert PIL image to numpy array
	st.image(image, caption="Captured Image", use_column_width=True)

	# Step 2: Detect Objects
	detected_ids = detect_objects(image_np, net, output_layers)
	detected_objects = get_object_names(detected_ids)
	st.write(f"Detected Objects: {detected_objects}")

	# Step 3: Filter Relevant Objects
	setting = st.selectbox("Select Setting", ["indoor", "outdoor"], index=0)
	relevant_objects = filter_relevant_objects(detected_objects, setting)
	st.write(f"Relevant Objects: {relevant_objects}")

	# Step 4: Generate Summary
	summary = generate_summary(relevant_objects)
	st.write(f"Summary: {summary}")

	# Step 5: Convert Summary to Speech
	text_to_speech(summary)

	# Step 6: GPS Navigation (simulated)
	address = st.text_input("Enter Object's Address", "1600 Amphitheatre Parkway, Mountain View, CA")
	if st.button("Get Distance to Object"):
	distance, duration = get_distance_to_object(address)
	st.write(f"Distance to Object: {distance}, Duration: {duration}")

	if __name__ == "__main__":
	main()