Final_Assignment_Template

Sleeping

Final_Assignment_Template / tools /transcribe.py

José Enrique

updated evaluation files

9ccff9e 6 months ago

5 kB


	import os
	from langchain.document_loaders.generic import GenericLoader
	from langchain.document_loaders.parsers import OpenAIWhisperParser
	from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader


	from langchain.schema import Document
	from smolagents import tool
	from yt_dlp import YoutubeDL
	from PIL import Image
	import cv2
	import numpy as np
	from smolagents.agents import ActionStep
	from smolagents import CodeAgent

	def get_video_frames(video_path:str,task_id:str)->list[str]:
	vidcap = cv2.VideoCapture(video_path)
	total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
	fps = vidcap.get(cv2.CAP_PROP_FPS)

	frames = []
	frame_indices = np.linspace(0, total_frames - 1, 7, dtype=int)

	for i in frame_indices:
	vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
	success, image = vidcap.read()
	if success:
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert from BGR to RGB
	pil_image = Image.fromarray(image)
	timestamp = round(i / fps, 2)
	frames.append((pil_image, timestamp))

	vidcap.release()
	observations = []
	for frame in frames:
	image, timestamp = frame
	image.save(f"attachments/frame_{timestamp}.png", format='PNG')
	observations.append({
	"image": f"attachments/{task_id}_frame_{timestamp}.png",
	"timestamp": timestamp
	})

	return observations


	@tool
	def parse_youtube_video(url:str,task_id:str,save_dir:str="attachments")->Document:
	"""Parse a YouTube video and return its transcript as a Document.
	Args:
	url (str): The URL of the YouTube video.
	task_id (str): The task ID to save the transcript.
	save_dir (str): The directory to save the transcript file. Defaults to "attachments".
	Returns:
	Document: The parsed transcript of the YouTube video."""
	loader = YoutubeAudioLoader([url],save_dir)
	# check that the file exists at transcript_dir and load it
	if save_dir != None:
	if os.path.isfile(save_dir+"/"+task_id+".json"):
	print("trying to load transcript from file...")
	document = GenericLoader.from_filesystem(save_dir+"/"+task_id+".json").load()
	else:
	parser = OpenAIWhisperParser()
	document = GenericLoader(loader, parser).load()
	return document

	@tool
	def download_youtube_video(url:str,task_id:str,save_dir:str="attachments")->list[str]:
	"""Returns a list of framws of a YouTube video.
	Args:
	url (str): The URL of the YouTube video.
	task_id (str): The task ID to save the transcript.
	save_dir (str): The directory to save the downloaded video. Defaults to "attachments".
	Returns:
	str: The path to the downloaded video file."""
	print(f"Downloading audio from YouTube: {url}")
	#output_path = generate_unique_filename(".wav")
	ydl_opts = {
	'format': 'bestvideo[ext=mp4][height<=480]/bestvideo[ext=mp4]/bestvideo',
	'outtmpl': os.path.join(save_dir, task_id+'.%(ext)s'),
	# 'postprocessors': [{
	# 'key': 'FFmpegVideoConvertor',
	# 'preferredformat': 'mp4',
	# }],
	'progress_hooks': [lambda d: print(d['status'])],
	'ignoreerrors': True,
	'no_warnings': False,
	'log_verbosity': 'quiet',
	}
	try:
	with YoutubeDL(ydl_opts) as ydl:
	info_dict = ydl.extract_info(url, download=True)
	video_title = info_dict.get('title', 'video')
	print(f"Successfully downloaded '{video_title}' as low-quality MP4 (video-only) to '{save_dir}'")
	except Exception as e:
	print(f"An error occurred: {e}")
	# get the video frames:
	observations = []
	observations = get_video_frames(os.path.join(save_dir, task_id+'.mp4'),task_id)

	return observations

	def load_images(step_log: ActionStep, agent: CodeAgent) -> None:
	current_step = step_log.step_number


	#for step_logs in agent.logs: # Remove previous screenshots from logs for lean processing
	# if isinstance(step_log, ActionStep) and step_log.step_number <= current_step - 2:
	# step_logs.observations_images = None
	print(step_log.model_output)
	# if isinstance(step_logs,
	# image = Image.open(BytesIO(png_bytes))
	# print(f"Captured a browser screenshot: {image.size} pixels")
	# step_log.observations_images = [image.copy()] # Create a copy to ensure it persists, important!

	# # Update observations with current URL
	# url_info = f"Current url: {driver.current_url}"
	# step_log.observations = url_info if step_logs.observations is None else step_log.observations + "\n" + url_info
	return



	if __name__ == "__main__":

	url = "https://www.youtube.com/watch?v=1htKBjuUWec"
	# https://www.youtube.com/watch?v=L1vXCYZAYYM
	task_id = "test_task"
	save_dir = "attachments"
	# Test the YouTube video parsing
	download_youtube_video(url, task_id, save_dir)