Spaces:
Sleeping
Sleeping
File size: 10,035 Bytes
43060fa a510082 43060fa a510082 43060fa a510082 43060fa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 | import os
import gradio as gr
from cerebras.cloud.sdk import Cerebras
from gtts import gTTS
import assemblyai as aai
from moviepy import VideoFileClip,concatenate_videoclips, AudioFileClip, TextClip, CompositeVideoClip
import requests
# Initialize Cerebras client
Cerekey = os.getenv("Ckey")
client = Cerebras(api_key= Cerekey)
# Pexels API key
pexkey = os.getenv("Pkey")
PEXELS_API_KEY = pexkey
# assembly AI API key
asskey = os.getenv("Akey")
aai.settings.api_key = asskey
# Modify the system prompt to include the estimated word count based on video duration
def generate_script(prompt, max_duration):
system_message = f"You are an expert video content creator and narration writer who is proficient in generating narration from user prompts and crafting a concise and poetic narration that aligns with the prompt. Craft a concise, poetic narration for the prompt. Go straight to the narration, don't write a foreward or a description of your action. The narration should be suitable for a video that can be read in less than {max_duration} seconds."
stream = client.chat.completions.create(
messages=[{"role": "system", "content": system_message}, {"role": "user", "content": prompt}],
model="llama-3.3-70b",
stream=False,
max_completion_tokens=1024,
temperature=0.7,
top_p=1
)
return stream.choices[0].message.content
def search_and_download_videos(query, max_duration, aspect_ratio, download_folder, max_results=6):
url = "https://api.pexels.com/videos/search"
headers = {"Authorization": PEXELS_API_KEY}
params = {"query": query, "per_page": max_results}
try:
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
videos = response.json().get("videos", [])
if not os.path.exists(download_folder):
os.makedirs(download_folder)
downloaded_files = []
for video in videos:
duration = video.get("duration")
width = video.get("width")
height = video.get("height")
if width and height:
video_aspect_ratio = "landscape" if width > height else "portrait" if height > width else "square"
if duration <= max_duration and video_aspect_ratio == aspect_ratio:
video_url = video["video_files"][0]["link"]
video_id = video["id"]
video_filename = os.path.join(download_folder, f"{video_id}.mp4")
video_response = requests.get(video_url, stream=True)
with open(video_filename, "wb") as file:
for chunk in video_response.iter_content(chunk_size=1024):
file.write(chunk)
downloaded_files.append(video_filename)
return downloaded_files
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
return []
def generate_narration(script, output_file="narration.mp3"):
tts = gTTS(script, lang="en")
tts.save(output_file)
return output_file
def load_videos_from_folder(folder_path):
if not os.path.exists(folder_path):
print(f"Error: The folder '{folder_path}' does not exist.")
return []
video_files = [
os.path.join(folder_path, file)
for file in os.listdir(folder_path)
if file.endswith(('.mp4', '.mov', '.avi', '.mkv'))
]
return video_files
def aggregate_videos(clips):
if not clips:
return None
return concatenate_videoclips(clips, method="compose")
def trim_video_to_audio_length(final_video, audio_length):
if final_video.duration > audio_length:
# Use subclipped method for CompositeVideoClip
final_video = final_video.subclipped(0, audio_length)
return final_video
# Function to add narration to the final video
def add_narration_to_video(final_video, narration_path):
if os.path.exists(narration_path):
narration_audio = AudioFileClip(narration_path)
narration_audio = narration_audio.with_duration(final_video.duration) # Adjust duration to match video
final_video = final_video.with_audio(narration_audio) # Use with_audio instead of set_audio
return final_video
def save_final_video(final_video, output_path):
final_video.write_videofile(output_path, codec="libx264", audio_codec="aac", preset="ultrafast")
def split_text_into_lines(data):
MaxChars = 40
MaxDuration = 2.5
MaxGap = 1.5
subtitles = []
line = []
line_duration = 0
line_chars = 0
for idx, wd in enumerate(data):
# start a new line if too many chars or too long duration
if (line_chars + len(wd['word']) > MaxChars) or (line_duration > MaxDuration):
subtitles.append({
"word": " ".join(w['word'] for w in line),
"start": line[0]['start'],
"end": line[-1]['end'],
"textcontents": line
})
line = []
line_chars = 0
line_duration = 0
line.append(wd)
line_chars += len(wd['word'])
line_duration = wd['end'] - line[0]['start']
# also split on long pause
if idx < len(data)-1 and data[idx+1]['start'] - wd['end'] > MaxGap:
subtitles.append({
"word": " ".join(w['word'] for w in line),
"start": line[0]['start'],
"end": wd['end'],
"textcontents": line
})
line = []
line_chars = 0
line_duration = 0
if line:
subtitles.append({
"word": " ".join(w['word'] for w in line),
"start": line[0]['start'],
"end": line[-1]['end'],
"textcontents": line
})
return subtitles
def generate_video(
prompt: str,
max_duration: int,
aspect_ratio: str,
download_folder: str = "downloaded_videos",
max_results: int = 6
):
# 1️⃣ Generate the narration script
script = generate_script(prompt, max_duration)
# 2️⃣ Search & download Pexels videos
videos = search_and_download_videos(
prompt, max_duration, aspect_ratio, download_folder, max_results
)
if not videos:
return "No videos were downloaded.", None, script
# 3️⃣ Load and concatenate downloaded clips
video_clips = [VideoFileClip(path) for path in videos]
final_video = aggregate_videos(video_clips)
if final_video is None:
return "Error generating video.", None, script
# 4️⃣ Generate TTS narration and attach audio
narration_file = generate_narration(script)
audio_len = AudioFileClip(narration_file).duration
final_video = trim_video_to_audio_length(final_video, audio_len)
final_video = add_narration_to_video(final_video, narration_file)
# 5️⃣ Transcribe narration for word‑level timings
transcript = aai.Transcriber().transcribe(narration_file)
wordlevel_info = [
{
"word": w.text,
"start": w.start / 1000.0,
"end": w.end / 1000.0
}
for w in transcript.words
]
# 6️⃣ Split word‑timestamps into line‑level subtitles
linelevel_subs = split_text_into_lines(wordlevel_info)
# 7️⃣ Build subtitle clips (static + highlights)
fw, fh = final_video.size
font, fs, ypos = "Helvetica", 44, fh - 64
all_clips = [final_video]
for line in linelevel_subs:
# ─ Static full‑line text
txt = TextClip(
line["word"],
font=font,
fontsize=fs,
color="white",
stroke_color="black",
stroke_width=1
)
x0 = (fw - txt.w) / 2
static = (
txt
.set_start(line["start"])
.set_duration(line["end"] - line["start"])
.set_position((x0, ypos))
)
all_clips.append(static)
# ─ Word‑by‑word highlight
cursor = x0
for wd in line["textcontents"]:
wc = TextClip(
wd["word"],
font=font,
fontsize=fs,
color="yellow",
stroke_color="black",
stroke_width=1
)
hl = (
wc
.set_start(wd["start"])
.set_duration(wd["end"] - wd["start"])
.set_position((cursor, ypos))
)
all_clips.append(hl)
# advance cursor by measuring a space after the word
dummy = TextClip(wd["word"] + " ", font=font, fontsize=fs)
cursor += dummy.w
# 8️⃣ Composite all clips and export
subtitled = CompositeVideoClip(all_clips, size=(fw, fh)) \
.set_audio(final_video.audio)
output_path = "final_with_subtitles.mp4"
subtitled.write_videofile(
output_path,
fps=24,
codec="libx264",
audio_codec="aac",
preset="ultrafast"
)
# Return TTS audio path, final video path, and the script
return narration_file, output_path, script
iface = gr.Interface(
fn=generate_video,
inputs=[
gr.Textbox(label="Enter Text Prompt", placeholder="Enter the text to generate the video script."),
gr.Slider(minimum=1, maximum=30, step=1, label="Video Length (seconds)", value=10),
gr.Radio(choices=["portrait", "landscape", "square"], label="Select Aspect Ratio", value="landscape"),
],
outputs=[
gr.Audio(label="Narration Audio"),
gr.Video(label="Generated Video"),
gr.Textbox(label="Generated Script", interactive=False)
],
title="Sepia Text-to-Video Generator",
description="Enter a text prompt, specify the length of the video (maximum 30 seconds), select the aspect ratio, and click 'Submit' to get the narrated audio, the video and the script.",
live=False
)
iface.launch(debug=True) |