AdGenesis-App / generator_function /video_text_generator.py
userIdc2024's picture
Upload 2 files
638b572 verified
import os, uuid, subprocess, boto3, replicate
from typing import Dict, Any
from dotenv import load_dotenv
# from helpers_function.helpers import upload_to_r2
load_dotenv()
replicate_client = replicate.Client(api_token=os.getenv("REPLICATE_API_KEY"))
def video_to_audio(video_path: str) -> str:
"""Extract audio from video using ffmpeg and return audio filename."""
audio_filename = f"{uuid.uuid4()}.mp3"
command = [
"ffmpeg", "-hide_banner", "-loglevel", "error",
"-i", video_path, "-vn", "-acodec", "libmp3lame", "-y", audio_filename
]
subprocess.run(command, check=True)
return audio_filename
def extract_text_from_video(video_path: str, max_duration: int = 60) -> Dict[str, Any]:
"""
Convert video speech to text using Replicate seamless model.
Steps:
1. Extract audio from video.
2. Upload audio to R2.
3. Run Replicate model.
"""
audio_file = video_to_audio(video_path)
# audio_url = upload_to_r2(audio_file, f"audio/{os.path.basename(audio_file)}")
try:
with open(audio_file, "rb") as f:
# Call Replicate model
result = replicate_client.run(
"cjwbw/seamless_communication:668a4fec05a887143e5fe8d45df25ec4c794dd43169b9a11562309b2d45873b0",
input={
"task_name": "S2ST (Speech to Speech translation)",
"input_audio": f,
"input_text_language": "None",
"max_input_audio_length": max_duration,
"target_language_text_only": "English",
"target_language_with_speech": "English",
},
)
finally:
try:
os.remove(audio_file)
except Exception:
pass
return result if result else {"error": "No output"}