Spaces:

userIdc2024
/

AdGenesis-App

Running

AdGenesis-App / generator_function /video_text_generator.py

Upload 2 files

638b572 verified 3 months ago

1.84 kB

	import os, uuid, subprocess, boto3, replicate
	from typing import Dict, Any
	from dotenv import load_dotenv
	# from helpers_function.helpers import upload_to_r2

	load_dotenv()

	replicate_client = replicate.Client(api_token=os.getenv("REPLICATE_API_KEY"))

	def video_to_audio(video_path: str) -> str:
	"""Extract audio from video using ffmpeg and return audio filename."""
	audio_filename = f"{uuid.uuid4()}.mp3"
	command = [
	"ffmpeg", "-hide_banner", "-loglevel", "error",
	"-i", video_path, "-vn", "-acodec", "libmp3lame", "-y", audio_filename
	]
	subprocess.run(command, check=True)
	return audio_filename

	def extract_text_from_video(video_path: str, max_duration: int = 60) -> Dict[str, Any]:
	"""
	Convert video speech to text using Replicate seamless model.
	Steps:
	1. Extract audio from video.
	2. Upload audio to R2.
	3. Run Replicate model.
	"""

	audio_file = video_to_audio(video_path)


	# audio_url = upload_to_r2(audio_file, f"audio/{os.path.basename(audio_file)}")

	try:
	with open(audio_file, "rb") as f:
	# Call Replicate model
	result = replicate_client.run(
	"cjwbw/seamless_communication:668a4fec05a887143e5fe8d45df25ec4c794dd43169b9a11562309b2d45873b0",
	input={
	"task_name": "S2ST (Speech to Speech translation)",
	"input_audio": f,
	"input_text_language": "None",
	"max_input_audio_length": max_duration,
	"target_language_text_only": "English",
	"target_language_with_speech": "English",
	},
	)
	finally:
	try:
	os.remove(audio_file)
	except Exception:
	pass

	return result if result else {"error": "No output"}