Spaces:

practisebook
/

Sight_Assist

Sleeping

App Files Files Community

Sight_Assist / app.py

practisebook

Create app.py

ad1d5a5 verified about 1 year ago

raw

history blame contribute delete

3.24 kB

	import streamlit as st
	from ultralytics import YOLO
	import cv2
	import tempfile
	import os
	from gtts import gTTS

	# Load YOLOv8 model
	@st.cache_resource
	def load_model():
	return YOLO('yolov8n.pt') # Automatically downloads YOLOv8 pre-trained model

	model = load_model()

	# Streamlit app title
	st.title("Object Detection in Video")
	st.write("Upload a video, and the application will detect and label objects frame by frame, and generate a summary.")

	# File uploader
	uploaded_video = st.file_uploader("Upload a Video", type=["mp4", "avi", "mov"])

	if uploaded_video:
	# Save the uploaded video to a temporary file
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
	temp_file.write(uploaded_video.read())
	video_path = temp_file.name

	# Open the video file
	video = cv2.VideoCapture(video_path)
	frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
	frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
	fps = int(video.get(cv2.CAP_PROP_FPS))
	total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

	# Create an output video file
	output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
	fourcc = cv2.VideoWriter_fourcc(*"mp4v")
	out = cv2.VideoWriter(output_file.name, fourcc, fps, (frame_width, frame_height))

	# Initialize a set to collect unique detected objects
	detected_objects = set()

	# Process video frame by frame
	st.write("Processing video...")
	progress_bar = st.progress(0)

	for i in range(total_frames):
	ret, frame = video.read()
	if not ret:
	break

	# Object detection on the current frame
	results = model(frame)

	# Collect unique object names
	detected_objects.update([model.names[int(box.cls)] for box in results[0].boxes])

	# Annotate frame with bounding boxes
	annotated_frame = results[0].plot()

	# Write annotated frame to the output video
	out.write(annotated_frame)

	# Update progress bar
	progress_bar.progress((i + 1) / total_frames)

	# Release resources
	video.release()
	out.release()

	# Generate text summary
	if detected_objects:
	detected_objects_list = ", ".join(detected_objects)
	summary_text = f"In this video, the following objects were detected: {detected_objects_list}."
	else:
	summary_text = "No objects were detected in the video."

	st.write("Summary:")
	st.write(summary_text)

	# Generate audio summary using gTTS
	tts = gTTS(text=summary_text, lang='en')
	audio_file = os.path.join(tempfile.gettempdir(), "summary.mp3")
	tts.save(audio_file)

	# Display the output video
	st.write("Video processing complete! Download or watch the labeled video below:")
	st.video(output_file.name)
	st.download_button(
	label="Download Labeled Video",
	data=open(output_file.name, "rb").read(),
	file_name="labeled_video.mp4",
	mime="video/mp4"
	)

	# Provide audio playback
	st.audio(audio_file, format="audio/mp3")
	st.download_button(
	label="Download Audio Summary",
	data=open(audio_file, "rb").read(),
	file_name="summary.mp3",
	mime="audio/mp3"
	)