Spaces:

doggdad
/

mmrag-hf

Sleeping

App Files Files Community

mmrag-hf / src /preprocess /preprocessing.py

doggdad

Upload 18 files

21cbf97 verified 5 months ago

raw

history blame contribute delete

2.2 kB

	from os import path as osp
	import json

	import cv2
	import webvtt

	from utils import maintain_aspect_ratio_resize, str2time

	def extract_and_save_frames_and_metadata(
	path_to_video,
	path_to_transcript,
	path_to_save_extracted_frames,
	path_to_save_metadatas):

	# metadatas will store the metadata of all extracted frames
	metadatas = []

	# load video using cv2
	video = cv2.VideoCapture(path_to_video)
	# load transcript using webvtt
	trans = webvtt.read(path_to_transcript)

	# iterate transcript file
	# for each video segment specified in the transcript file
	for idx, transcript in enumerate(trans):

	# get the start time and end time in seconds
	start_time_ms = str2time(transcript.start)
	end_time_ms = str2time(transcript.end)
	# get the time in ms exactly
	# in the middle of start time and end time
	mid_time_ms = (end_time_ms + start_time_ms) / 2
	# get the transcript, remove the next-line symbol
	text = transcript.text.replace("\n", ' ')
	# get frame at the middle time
	video.set(cv2.CAP_PROP_POS_MSEC, mid_time_ms)
	success, frame = video.read()
	if success:
	# if the frame is extracted successfully, resize it
	image = maintain_aspect_ratio_resize(frame, height=350)
	# save frame as JPEG file
	img_fname = f'frame_{idx}.jpg'
	img_fpath = osp.join(
	path_to_save_extracted_frames, img_fname
	)
	cv2.imwrite(img_fpath, image)

	# prepare the metadata
	metadata = {
	'extracted_frame_path': img_fpath,
	'transcript': text,
	'video_segment_id': idx,
	'video_path': path_to_video,
	'mid_time_ms': mid_time_ms,
	}
	metadatas.append(metadata)

	else:
	print(f"ERROR! Cannot extract frame: idx = {idx}")

	# save metadata of all extracted frames
	fn = osp.join(path_to_save_metadatas, 'metadatas.json')
	with open(fn, 'w') as outfile:
	json.dump(metadatas, outfile)
	return metadatas