Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from PIL import Image | |
| import cv2 | |
| import numpy as np | |
| from gtts import gTTS | |
| import os | |
| # Load pre-trained model and classes | |
| def load_model(): | |
| # Load YOLO model from OpenCV | |
| net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg") # Ensure these files are in the working directory | |
| layer_names = net.getLayerNames() | |
| output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()] | |
| return net, output_layers | |
| # Object detection function | |
| def detect_objects(image, net, output_layers): | |
| height, width, _ = image.shape | |
| # Prepare the image for detection | |
| blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False) | |
| net.setInput(blob) | |
| outputs = net.forward(output_layers) | |
| # Process the outputs | |
| detected_objects = [] | |
| for out in outputs: | |
| for detection in out: | |
| scores = detection[5:] # Get the scores for detected objects | |
| class_id = np.argmax(scores) | |
| confidence = scores[class_id] | |
| # Filter out weak predictions | |
| if confidence > 0.5: # Adjust threshold as needed | |
| detected_objects.append(class_id) | |
| return detected_objects | |
| # Mock function to convert class IDs to object names | |
| def get_object_names(class_ids): | |
| # Sample mapping (extend this according to your class IDs) | |
| class_names = {0: "person", 1: "bicycle", 2: "car", 3: "motorcycle", 4: "airplane", | |
| 5: "bus", 6: "train", 7: "truck", 8: "boat", 9: "traffic light", | |
| 10: "fire hydrant", 11: "stop sign", 12: "parking meter", 13: "bench", | |
| 14: "bird", 15: "cat", 16: "dog", 17: "horse", 18: "sheep", 19: "cow"} | |
| return [class_names[id] for id in class_ids if id in class_names] | |
| # Mock context-aware filter function | |
| def filter_relevant_objects(detected_objects, setting): | |
| st.write(f"Filtering relevant objects for setting: {setting}") | |
| if setting == "indoor": | |
| return [obj for obj in detected_objects if obj in ["table", "lamp"]] | |
| return detected_objects | |
| # Mock summarization function | |
| def generate_summary(relevant_objects): | |
| st.write("Generating summary for relevant objects...") | |
| summary = f"This is an {len(relevant_objects)}-item scene including: {', '.join(relevant_objects)}." | |
| return summary | |
| # Mock text-to-speech function | |
| def text_to_speech(text): | |
| st.write("Converting summary to speech...") | |
| tts = gTTS(text) | |
| tts.save("summary.mp3") | |
| st.audio("summary.mp3") | |
| # Mock GPS navigation function | |
| def get_distance_to_object(address): | |
| st.write(f"Calculating distance to address: {address}") | |
| return "5 km", "15 mins" | |
| # Streamlit app main function | |
| def main(): | |
| st.title("Context-Aware Object Detection App") | |
| # Load the YOLO model | |
| net, output_layers = load_model() | |
| # Step 1: Capture Image from Camera | |
| captured_image = st.camera_input("Take a picture") | |
| if captured_image is not None: | |
| # Open the captured image | |
| image = Image.open(captured_image) | |
| image_np = np.array(image) # Convert PIL image to numpy array | |
| st.image(image, caption="Captured Image", use_column_width=True) | |
| # Step 2: Detect Objects | |
| detected_ids = detect_objects(image_np, net, output_layers) | |
| detected_objects = get_object_names(detected_ids) | |
| st.write(f"Detected Objects: {detected_objects}") | |
| # Step 3: Filter Relevant Objects | |
| setting = st.selectbox("Select Setting", ["indoor", "outdoor"], index=0) | |
| relevant_objects = filter_relevant_objects(detected_objects, setting) | |
| st.write(f"Relevant Objects: {relevant_objects}") | |
| # Step 4: Generate Summary | |
| summary = generate_summary(relevant_objects) | |
| st.write(f"Summary: {summary}") | |
| # Step 5: Convert Summary to Speech | |
| text_to_speech(summary) | |
| # Step 6: GPS Navigation (simulated) | |
| address = st.text_input("Enter Object's Address", "1600 Amphitheatre Parkway, Mountain View, CA") | |
| if st.button("Get Distance to Object"): | |
| distance, duration = get_distance_to_object(address) | |
| st.write(f"Distance to Object: {distance}, Duration: {duration}") | |
| if __name__ == "__main__": | |
| main() | |