Spaces:

Agents-MCP-Hackathon
/

video_mcp

Sleeping

App Files Files Community

video_mcp / app.py

jomasego

Initial commit: Add project files and README

0cff18c 6 months ago

raw

history blame

12.3 kB

	import gradio as gr
	import os
	import requests
	import tempfile
	import subprocess
	import re
	import shutil # Added for rmtree
	import modal

	def is_youtube_url(url_string: str) -> bool:
	"""Checks if the given string is a YouTube URL."""
	# More robust regex to find YouTube video ID, accommodating various URL formats
	# and additional query parameters.
	youtube_regex = (
	r'(?:youtube(?:-nocookie)?\.com/(?:[^/\n\s]+/\|watch(?:/\|\?(?:[^&\n\s]+&)v=)\|embed(?:/\|\?(?:[^&\n\s]+&)feature=oembed)\|shorts/\|live/)\|youtu\.be/)'
	r'([a-zA-Z0-9_-]{11})' # This captures the 11-character video ID
	)
	# We use re.search because the video ID might not be at the start of the query string part of the URL.
	# re.match only matches at the beginning of the string (or beginning of line in multiline mode).
	# The regex now directly looks for the 'v=VIDEO_ID' or youtu.be/VIDEO_ID structure.
	# The first part of the regex matches the domain and common paths, the second part captures the ID.
	return bool(re.search(youtube_regex, url_string))

	def download_video(url_string: str, temp_dir: str) -> str \| None:
	"""Downloads video from a URL (YouTube or direct link) to a temporary directory."""
	if is_youtube_url(url_string):
	print(f"Attempting to download YouTube video: {url_string}")
	# Define a fixed output filename pattern within the temp_dir
	output_filename_template = "downloaded_video.%(ext)s" # yt-dlp replaces %(ext)s
	output_path_template = os.path.join(temp_dir, output_filename_template)

	cmd = [
	"yt-dlp",
	"-f", "bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4/best", # Prefer mp4 format
	"--output", output_path_template,
	url_string
	]
	print(f"Executing yt-dlp command: {' '.join(cmd)}")

	try:
	result = subprocess.run(cmd, capture_output=True, text=True, timeout=300, check=False)

	print(f"yt-dlp STDOUT:\n{result.stdout}")
	print(f"yt-dlp STDERR:\n{result.stderr}")

	if result.returncode == 0:
	# Find the actual downloaded file based on the template
	downloaded_file_path = None
	for item in os.listdir(temp_dir):
	if item.startswith("downloaded_video."):
	potential_path = os.path.join(temp_dir, item)
	if os.path.isfile(potential_path):
	downloaded_file_path = potential_path
	print(f"YouTube video successfully downloaded to: {downloaded_file_path}")
	break
	if downloaded_file_path:
	return downloaded_file_path
	else:
	print(f"yt-dlp seemed to succeed (exit code 0) but the output file 'downloaded_video.*' was not found in {temp_dir}.")
	return None
	else:
	print(f"yt-dlp failed with return code {result.returncode}.")
	return None
	except subprocess.TimeoutExpired:
	print(f"yt-dlp command timed out after 300 seconds for URL: {url_string}")
	return None
	except Exception as e:
	print(f"An unexpected error occurred during yt-dlp execution for {url_string}: {e}")
	return None

	elif url_string.startswith(('http://', 'https://')) and url_string.lower().endswith(('.mp4', '.mov', '.avi', '.mkv', '.webm')):
	print(f"Attempting to download direct video link: {url_string}")
	try:
	response = requests.get(url_string, stream=True, timeout=300) # 5 min timeout
	response.raise_for_status() # Raises HTTPError for bad responses (4XX or 5XX)

	filename = os.path.basename(url_string) or "downloaded_video_direct.mp4"
	video_file_path = os.path.join(temp_dir, filename)

	with open(video_file_path, 'wb') as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)
	print(f"Direct video downloaded successfully to: {video_file_path}")
	return video_file_path
	except requests.exceptions.RequestException as e:
	print(f"Error downloading direct video link {url_string}: {e}")
	return None
	except Exception as e:
	print(f"An unexpected error occurred during direct video download for {url_string}: {e}")
	return None
	else:
	print(f"Input '{url_string}' is not a recognized YouTube URL or direct video link for download.")
	return None


	def process_video_input(input_string: str) -> str:
	"""
	Processes the video (from URL or local file path) and returns its transcription status.
	"""
	if not input_string:
	return "Error: No video URL or file path provided."

	video_path_to_process = None
	created_temp_dir = None # To store path of temp directory if created for download

	try:
	if input_string.startswith(('http://', 'https://')):
	print(f"Input is a URL: {input_string}")
	created_temp_dir = tempfile.mkdtemp()
	print(f"Created temporary directory for download: {created_temp_dir}")
	downloaded_path = download_video(input_string, created_temp_dir)

	if downloaded_path and os.path.exists(downloaded_path):
	video_path_to_process = downloaded_path
	else:
	# Error message is already printed by download_video or this block
	print(f"Failed to download or locate video from URL: {input_string}")
	# Cleanup is handled in finally, so just return error
	return "Error: Failed to download video from URL."

	elif os.path.exists(input_string):
	print(f"Input is a local file path: {input_string}")
	video_path_to_process = input_string
	else:
	return f"Error: Input '{input_string}' is not a valid URL or an existing file path."

	if video_path_to_process:
	print(f"Processing video: {video_path_to_process}")
	print(f"Video path to process: {video_path_to_process}")
	try:
	print("Reading video file into bytes...")
	with open(video_path_to_process, "rb") as video_file:
	video_bytes_content = video_file.read()
	print(f"Read {len(video_bytes_content)} bytes from video file.")

	# Ensure MODAL_TOKEN_ID and MODAL_TOKEN_SECRET are set as environment variables
	# in your Hugging Face Space. For local `python app.py` runs, Modal CLI's
	# authenticated state is usually used.
	# os.environ["MODAL_TOKEN_ID"] = "your_modal_token_id" # Replace or set in HF Space
	# os.environ["MODAL_TOKEN_SECRET"] = "your_modal_token_secret" # Replace or set in HF Space

	print("Looking up Modal function 'whisper-transcriber/transcribe_video_audio'...")
	# The function name should match what was deployed.
	# It's typically 'AppName/FunctionName' or just 'FunctionName' if app is default.
	# Based on your deployment log, app name is 'whisper-transcriber'
	# and function is 'transcribe_video_audio'
	try:
	f = modal.Function.from_name("whisper-transcriber", "transcribe_video_audio")
	print("Modal function looked up successfully.")
	except modal.Error as e:
	print("Modal function 'whisper-transcriber/transcribe_video_audio' not found. Trying with just function name.")
	# Fallback or alternative lookup, though the above should be correct for named apps.
	# This might be needed if the app name context is implicit.
	# For a named app 'whisper-transcriber' and function 'transcribe_video_audio',
	# the lookup `modal.Function.lookup("whisper-transcriber", "transcribe_video_audio")` is standard.
	# If it was deployed as part of the default app, then just "transcribe_video_audio" might work.
	# Given the deployment log, the first lookup should be correct.
	return "Error: Could not find the deployed Modal function. Please check deployment status and name."

	print("Calling Modal function for transcription...")
	# Using .remote() for asynchronous execution, .call() for synchronous
	# For Gradio, synchronous (.call()) might be simpler to handle the response directly.
	transcription = f.remote(video_bytes_content) # Use .remote() for Modal function call
	print(f"Received transcription from Modal: {transcription[:100]}...")
	return transcription
	except FileNotFoundError:
	print(f"Error: Video file not found at {video_path_to_process} before sending to Modal.")
	return f"Error: Video file disappeared before processing."
	except modal.Error as e: # Using modal.Error as the base Modal exception
	print(f"Modal specific error: {e}")
	return f"Error during Modal operation: {str(e)}"
	except Exception as e:
	print(f"An unexpected error occurred while calling Modal: {e}")
	import traceback
	traceback.print_exc()
	return f"Error: Failed to get transcription. {str(e)}"
	else:
	# This case should ideally be caught by earlier checks
	return "Error: No video available to process after input handling."

	finally:
	if created_temp_dir and os.path.exists(created_temp_dir):
	print(f"Cleaning up temporary directory: {created_temp_dir}")
	try:
	shutil.rmtree(created_temp_dir)
	print(f"Successfully removed temporary directory: {created_temp_dir}")
	except Exception as e:
	print(f"Error removing temporary directory {created_temp_dir}: {e}")

	# Gradio Interface for the API endpoint
	api_interface = gr.Interface(
	fn=process_video_input,
	inputs=gr.Textbox(label="Video URL or Local File Path for Transcription",
	placeholder="Enter YouTube URL, direct video URL (.mp4, .mov, etc.), or local file path..."),
	outputs="text",
	title="Video Transcription API",
	description="Provide a video URL or local file path to get its audio transcription status.",
	allow_flagging="never"
	)

	# Gradio Interface for a simple user-facing demo
	def demo_process_video(input_string: str) -> str:
	"""
	A simple demo function for the Gradio UI.
	It calls the same backend logic as the API.
	"""
	print(f"Demo received input: {input_string}")
	result = process_video_input(input_string) # Call the core logic
	return result

	demo_interface = gr.Interface(
	fn=demo_process_video,
	inputs=gr.Textbox(label="Upload Video URL or Local File Path for Demo",
	placeholder="Enter YouTube URL, direct video URL (.mp4, .mov, etc.), or local file path..."),
	outputs="text",
	title="Video Transcription Demo",
	description="Provide a video URL or local file path to see its transcription status.",
	allow_flagging="never"
	)

	# Combine interfaces into a Blocks app
	with gr.Blocks() as app:
	gr.Markdown("# Contextual Video Data Server")
	gr.Markdown("This Hugging Face Space acts as a backend for processing video context for AI models.")

	with gr.Tab("API Endpoint (for AI Models)"):
	gr.Markdown("### Use this endpoint from another application (e.g., another Hugging Face Space).")
	gr.Markdown("The `process_video_input` function is exposed here.")
	api_interface.render()

	with gr.Tab("Demo (for Manual Testing)"):
	gr.Markdown("### Manually test video URLs or paths and observe the response.")
	demo_interface.render()

	# Launch the Gradio application
	if __name__ == "__main__":
	app.launch(server_name="0.0.0.0")