Sepia / app.py
Afeezee's picture
Update app.py
a510082 verified
import os
import gradio as gr
from cerebras.cloud.sdk import Cerebras
from gtts import gTTS
import assemblyai as aai
from moviepy import VideoFileClip,concatenate_videoclips, AudioFileClip, TextClip, CompositeVideoClip
import requests
# Initialize Cerebras client
Cerekey = os.getenv("Ckey")
client = Cerebras(api_key= Cerekey)
# Pexels API key
pexkey = os.getenv("Pkey")
PEXELS_API_KEY = pexkey
# assembly AI API key
asskey = os.getenv("Akey")
aai.settings.api_key = asskey
# Modify the system prompt to include the estimated word count based on video duration
def generate_script(prompt, max_duration):
system_message = f"You are an expert video content creator and narration writer who is proficient in generating narration from user prompts and crafting a concise and poetic narration that aligns with the prompt. Craft a concise, poetic narration for the prompt. Go straight to the narration, don't write a foreward or a description of your action. The narration should be suitable for a video that can be read in less than {max_duration} seconds."
stream = client.chat.completions.create(
messages=[{"role": "system", "content": system_message}, {"role": "user", "content": prompt}],
model="llama-3.3-70b",
stream=False,
max_completion_tokens=1024,
temperature=0.7,
top_p=1
)
return stream.choices[0].message.content
def search_and_download_videos(query, max_duration, aspect_ratio, download_folder, max_results=6):
url = "https://api.pexels.com/videos/search"
headers = {"Authorization": PEXELS_API_KEY}
params = {"query": query, "per_page": max_results}
try:
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
videos = response.json().get("videos", [])
if not os.path.exists(download_folder):
os.makedirs(download_folder)
downloaded_files = []
for video in videos:
duration = video.get("duration")
width = video.get("width")
height = video.get("height")
if width and height:
video_aspect_ratio = "landscape" if width > height else "portrait" if height > width else "square"
if duration <= max_duration and video_aspect_ratio == aspect_ratio:
video_url = video["video_files"][0]["link"]
video_id = video["id"]
video_filename = os.path.join(download_folder, f"{video_id}.mp4")
video_response = requests.get(video_url, stream=True)
with open(video_filename, "wb") as file:
for chunk in video_response.iter_content(chunk_size=1024):
file.write(chunk)
downloaded_files.append(video_filename)
return downloaded_files
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
return []
def generate_narration(script, output_file="narration.mp3"):
tts = gTTS(script, lang="en")
tts.save(output_file)
return output_file
def load_videos_from_folder(folder_path):
if not os.path.exists(folder_path):
print(f"Error: The folder '{folder_path}' does not exist.")
return []
video_files = [
os.path.join(folder_path, file)
for file in os.listdir(folder_path)
if file.endswith(('.mp4', '.mov', '.avi', '.mkv'))
]
return video_files
def aggregate_videos(clips):
if not clips:
return None
return concatenate_videoclips(clips, method="compose")
def trim_video_to_audio_length(final_video, audio_length):
if final_video.duration > audio_length:
# Use subclipped method for CompositeVideoClip
final_video = final_video.subclipped(0, audio_length)
return final_video
# Function to add narration to the final video
def add_narration_to_video(final_video, narration_path):
if os.path.exists(narration_path):
narration_audio = AudioFileClip(narration_path)
narration_audio = narration_audio.with_duration(final_video.duration) # Adjust duration to match video
final_video = final_video.with_audio(narration_audio) # Use with_audio instead of set_audio
return final_video
def save_final_video(final_video, output_path):
final_video.write_videofile(output_path, codec="libx264", audio_codec="aac", preset="ultrafast")
def split_text_into_lines(data):
MaxChars = 40
MaxDuration = 2.5
MaxGap = 1.5
subtitles = []
line = []
line_duration = 0
line_chars = 0
for idx, wd in enumerate(data):
# start a new line if too many chars or too long duration
if (line_chars + len(wd['word']) > MaxChars) or (line_duration > MaxDuration):
subtitles.append({
"word": " ".join(w['word'] for w in line),
"start": line[0]['start'],
"end": line[-1]['end'],
"textcontents": line
})
line = []
line_chars = 0
line_duration = 0
line.append(wd)
line_chars += len(wd['word'])
line_duration = wd['end'] - line[0]['start']
# also split on long pause
if idx < len(data)-1 and data[idx+1]['start'] - wd['end'] > MaxGap:
subtitles.append({
"word": " ".join(w['word'] for w in line),
"start": line[0]['start'],
"end": wd['end'],
"textcontents": line
})
line = []
line_chars = 0
line_duration = 0
if line:
subtitles.append({
"word": " ".join(w['word'] for w in line),
"start": line[0]['start'],
"end": line[-1]['end'],
"textcontents": line
})
return subtitles
def generate_video(
prompt: str,
max_duration: int,
aspect_ratio: str,
download_folder: str = "downloaded_videos",
max_results: int = 6
):
# 1️⃣ Generate the narration script
script = generate_script(prompt, max_duration)
# 2️⃣ Search & download Pexels videos
videos = search_and_download_videos(
prompt, max_duration, aspect_ratio, download_folder, max_results
)
if not videos:
return "No videos were downloaded.", None, script
# 3️⃣ Load and concatenate downloaded clips
video_clips = [VideoFileClip(path) for path in videos]
final_video = aggregate_videos(video_clips)
if final_video is None:
return "Error generating video.", None, script
# 4️⃣ Generate TTS narration and attach audio
narration_file = generate_narration(script)
audio_len = AudioFileClip(narration_file).duration
final_video = trim_video_to_audio_length(final_video, audio_len)
final_video = add_narration_to_video(final_video, narration_file)
# 5️⃣ Transcribe narration for word‑level timings
transcript = aai.Transcriber().transcribe(narration_file)
wordlevel_info = [
{
"word": w.text,
"start": w.start / 1000.0,
"end": w.end / 1000.0
}
for w in transcript.words
]
# 6️⃣ Split word‑timestamps into line‑level subtitles
linelevel_subs = split_text_into_lines(wordlevel_info)
# 7️⃣ Build subtitle clips (static + highlights)
fw, fh = final_video.size
font, fs, ypos = "Helvetica", 44, fh - 64
all_clips = [final_video]
for line in linelevel_subs:
# ─ Static full‑line text
txt = TextClip(
line["word"],
font=font,
fontsize=fs,
color="white",
stroke_color="black",
stroke_width=1
)
x0 = (fw - txt.w) / 2
static = (
txt
.set_start(line["start"])
.set_duration(line["end"] - line["start"])
.set_position((x0, ypos))
)
all_clips.append(static)
# ─ Word‑by‑word highlight
cursor = x0
for wd in line["textcontents"]:
wc = TextClip(
wd["word"],
font=font,
fontsize=fs,
color="yellow",
stroke_color="black",
stroke_width=1
)
hl = (
wc
.set_start(wd["start"])
.set_duration(wd["end"] - wd["start"])
.set_position((cursor, ypos))
)
all_clips.append(hl)
# advance cursor by measuring a space after the word
dummy = TextClip(wd["word"] + " ", font=font, fontsize=fs)
cursor += dummy.w
# 8️⃣ Composite all clips and export
subtitled = CompositeVideoClip(all_clips, size=(fw, fh)) \
.set_audio(final_video.audio)
output_path = "final_with_subtitles.mp4"
subtitled.write_videofile(
output_path,
fps=24,
codec="libx264",
audio_codec="aac",
preset="ultrafast"
)
# Return TTS audio path, final video path, and the script
return narration_file, output_path, script
iface = gr.Interface(
fn=generate_video,
inputs=[
gr.Textbox(label="Enter Text Prompt", placeholder="Enter the text to generate the video script."),
gr.Slider(minimum=1, maximum=30, step=1, label="Video Length (seconds)", value=10),
gr.Radio(choices=["portrait", "landscape", "square"], label="Select Aspect Ratio", value="landscape"),
],
outputs=[
gr.Audio(label="Narration Audio"),
gr.Video(label="Generated Video"),
gr.Textbox(label="Generated Script", interactive=False)
],
title="Sepia Text-to-Video Generator",
description="Enter a text prompt, specify the length of the video (maximum 30 seconds), select the aspect ratio, and click 'Submit' to get the narrated audio, the video and the script.",
live=False
)
iface.launch(debug=True)