Afeezee commited on
Commit
83aa67f
·
verified ·
1 Parent(s): 34e04a5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +291 -0
app.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from cerebras.cloud.sdk import Cerebras
4
+ from gtts import gTTS
5
+ import assemblyai as aai
6
+ from moviepy import VideoFileClip,concatenate_videoclips, AudioFileClip, TextClip, CompositeVideoClip
7
+ import requests
8
+
9
+ # Initialize Cerebras client
10
+ Cerekey = os.getenv("Ckey")
11
+ client = Cerebras(api_key= Cerekey)
12
+
13
+ # Pexels API key
14
+ pexkey = os.getenv("Pkey")
15
+ PEXELS_API_KEY = pexkey
16
+
17
+ # assembly AI API key
18
+ asskey = os.getenv("Akey")
19
+ aai.settings.api_key = asskey
20
+
21
+ # Modify the system prompt to include the estimated word count based on video duration
22
+ def generate_script(prompt, max_duration):
23
+ system_message = f"You are an expert video content creator and narration writer who is proficient in generating narration from user prompts and crafting a concise and poetic narration that aligns with the prompt. Craft a concise, poetic narration for the prompt. Go straight to the narration, don't write a foreward or a description of your action. The narration should be suitable for a video that can be read in less than {max_duration} seconds."
24
+
25
+ stream = client.chat.completions.create(
26
+ messages=[{"role": "system", "content": system_message}, {"role": "user", "content": prompt}],
27
+ model="llama-3.3-70b",
28
+ stream=False,
29
+ max_completion_tokens=1024,
30
+ temperature=0.7,
31
+ top_p=1
32
+ )
33
+ return stream.choices[0].message.content
34
+
35
+
36
+ def search_and_download_videos(query, max_duration, aspect_ratio, download_folder, max_results=6):
37
+ url = "https://api.pexels.com/videos/search"
38
+ headers = {"Authorization": PEXELS_API_KEY}
39
+ params = {"query": query, "per_page": max_results}
40
+
41
+ try:
42
+ response = requests.get(url, headers=headers, params=params)
43
+ response.raise_for_status()
44
+ videos = response.json().get("videos", [])
45
+
46
+ if not os.path.exists(download_folder):
47
+ os.makedirs(download_folder)
48
+
49
+ downloaded_files = []
50
+ for video in videos:
51
+ duration = video.get("duration")
52
+ width = video.get("width")
53
+ height = video.get("height")
54
+ if width and height:
55
+ video_aspect_ratio = "landscape" if width > height else "portrait" if height > width else "square"
56
+ if duration <= max_duration and video_aspect_ratio == aspect_ratio:
57
+ video_url = video["video_files"][0]["link"]
58
+ video_id = video["id"]
59
+ video_filename = os.path.join(download_folder, f"{video_id}.mp4")
60
+ video_response = requests.get(video_url, stream=True)
61
+ with open(video_filename, "wb") as file:
62
+ for chunk in video_response.iter_content(chunk_size=1024):
63
+ file.write(chunk)
64
+
65
+ downloaded_files.append(video_filename)
66
+ return downloaded_files
67
+ except requests.exceptions.RequestException as e:
68
+ print(f"Error: {e}")
69
+ return []
70
+
71
+
72
+ def generate_narration(script, output_file="narration.mp3"):
73
+ tts = gTTS(script, lang="en")
74
+ tts.save(output_file)
75
+ return output_file
76
+
77
+
78
+ def load_videos_from_folder(folder_path):
79
+ if not os.path.exists(folder_path):
80
+ print(f"Error: The folder '{folder_path}' does not exist.")
81
+ return []
82
+
83
+ video_files = [
84
+ os.path.join(folder_path, file)
85
+ for file in os.listdir(folder_path)
86
+ if file.endswith(('.mp4', '.mov', '.avi', '.mkv'))
87
+ ]
88
+ return video_files
89
+
90
+
91
+ def aggregate_videos(clips):
92
+ if not clips:
93
+ return None
94
+ return concatenate_videoclips(clips, method="compose")
95
+
96
+
97
+ def trim_video_to_audio_length(final_video, audio_length):
98
+ if final_video.duration > audio_length:
99
+ # Use subclipped method for CompositeVideoClip
100
+ final_video = final_video.subclipped(0, audio_length)
101
+ return final_video
102
+
103
+
104
+
105
+ # Function to add narration to the final video
106
+ def add_narration_to_video(final_video, narration_path):
107
+ if os.path.exists(narration_path):
108
+ narration_audio = AudioFileClip(narration_path)
109
+ narration_audio = narration_audio.with_duration(final_video.duration) # Adjust duration to match video
110
+ final_video = final_video.with_audio(narration_audio) # Use with_audio instead of set_audio
111
+ return final_video
112
+
113
+
114
+
115
+ def save_final_video(final_video, output_path):
116
+ final_video.write_videofile(output_path, codec="libx264", audio_codec="aac", preset="ultrafast")
117
+
118
+ def split_text_into_lines(data):
119
+ MaxChars = 40
120
+ MaxDuration = 2.5
121
+ MaxGap = 1.5
122
+
123
+ subtitles = []
124
+ line = []
125
+ line_duration = 0
126
+ line_chars = 0
127
+
128
+ for idx, wd in enumerate(data):
129
+ # start a new line if too many chars or too long duration
130
+ if (line_chars + len(wd['word']) > MaxChars) or (line_duration > MaxDuration):
131
+ subtitles.append({
132
+ "word": " ".join(w['word'] for w in line),
133
+ "start": line[0]['start'],
134
+ "end": line[-1]['end'],
135
+ "textcontents": line
136
+ })
137
+ line = []
138
+ line_chars = 0
139
+ line_duration = 0
140
+
141
+ line.append(wd)
142
+ line_chars += len(wd['word'])
143
+ line_duration = wd['end'] - line[0]['start']
144
+
145
+ # also split on long pause
146
+ if idx < len(data)-1 and data[idx+1]['start'] - wd['end'] > MaxGap:
147
+ subtitles.append({
148
+ "word": " ".join(w['word'] for w in line),
149
+ "start": line[0]['start'],
150
+ "end": wd['end'],
151
+ "textcontents": line
152
+ })
153
+ line = []
154
+ line_chars = 0
155
+ line_duration = 0
156
+
157
+ if line:
158
+ subtitles.append({
159
+ "word": " ".join(w['word'] for w in line),
160
+ "start": line[0]['start'],
161
+ "end": line[-1]['end'],
162
+ "textcontents": line
163
+ })
164
+
165
+ return subtitles
166
+
167
+ def generate_video(
168
+ prompt: str,
169
+ max_duration: int,
170
+ aspect_ratio: str,
171
+ download_folder: str = "downloaded_videos",
172
+ max_results: int = 6
173
+ ):
174
+ # 1️⃣ Generate the narration script
175
+ script = generate_script(prompt, max_duration)
176
+
177
+ # 2️⃣ Search & download Pexels videos
178
+ videos = search_and_download_videos(
179
+ prompt, max_duration, aspect_ratio, download_folder, max_results
180
+ )
181
+ if not videos:
182
+ return "No videos were downloaded.", None, script
183
+
184
+ # 3️⃣ Load and concatenate downloaded clips
185
+ video_clips = [VideoFileClip(path) for path in videos]
186
+ final_video = aggregate_videos(video_clips)
187
+ if final_video is None:
188
+ return "Error generating video.", None, script
189
+
190
+ # 4️⃣ Generate TTS narration and attach audio
191
+ narration_file = generate_narration(script)
192
+ audio_len = AudioFileClip(narration_file).duration
193
+ final_video = trim_video_to_audio_length(final_video, audio_len)
194
+ final_video = add_narration_to_video(final_video, narration_file)
195
+
196
+ # 5️⃣ Transcribe narration for word‑level timings
197
+ transcript = aai.Transcriber().transcribe(narration_file)
198
+ wordlevel_info = [
199
+ {
200
+ "word": w.text,
201
+ "start": w.start / 1000.0,
202
+ "end": w.end / 1000.0
203
+ }
204
+ for w in transcript.words
205
+ ]
206
+
207
+ # 6️⃣ Split word‑timestamps into line‑level subtitles
208
+ linelevel_subs = split_text_into_lines(wordlevel_info)
209
+
210
+ # 7️⃣ Build subtitle clips (static + highlights)
211
+ fw, fh = final_video.size
212
+ font, fs, ypos = "Helvetica", 44, fh - 64
213
+ all_clips = [final_video]
214
+
215
+ for line in linelevel_subs:
216
+ # ─ Static full‑line text
217
+ txt = TextClip(
218
+ line["word"],
219
+ font=font,
220
+ fontsize=fs,
221
+ color="white",
222
+ method='label',
223
+ stroke_color="black",
224
+ stroke_width=1
225
+ )
226
+ x0 = (fw - txt.w) / 2
227
+ static = (
228
+ txt
229
+ .set_start(line["start"])
230
+ .set_duration(line["end"] - line["start"])
231
+ .set_position((x0, ypos))
232
+ )
233
+ all_clips.append(static)
234
+
235
+ # ─ Word‑by‑word highlight
236
+ cursor = x0
237
+ for wd in line["textcontents"]:
238
+ wc = TextClip(
239
+ wd["word"],
240
+ font=font,
241
+ fontsize=fs,
242
+ color="yellow",
243
+ method='label',
244
+ stroke_color="black",
245
+ stroke_width=1
246
+ )
247
+ hl = (
248
+ wc
249
+ .set_start(wd["start"])
250
+ .set_duration(wd["end"] - wd["start"])
251
+ .set_position((cursor, ypos))
252
+ )
253
+ all_clips.append(hl)
254
+
255
+ # advance cursor by measuring a space after the word
256
+ dummy = TextClip(wd["word"] + " ", font=font, fontsize=fs, method='label',)
257
+ cursor += dummy.w
258
+
259
+ # 8️⃣ Composite all clips and export
260
+ subtitled = CompositeVideoClip(all_clips, size=(fw, fh)) \
261
+ .set_audio(final_video.audio)
262
+ output_path = "final_with_subtitles.mp4"
263
+ subtitled.write_videofile(
264
+ output_path,
265
+ fps=24,
266
+ codec="libx264",
267
+ audio_codec="aac",
268
+ preset="ultrafast"
269
+ )
270
+
271
+ # Return TTS audio path, final video path, and the script
272
+ return narration_file, output_path, script
273
+
274
+ iface = gr.Interface(
275
+ fn=generate_video,
276
+ inputs=[
277
+ gr.Textbox(label="Enter Text Prompt", placeholder="Enter the text to generate the video script."),
278
+ gr.Slider(minimum=1, maximum=30, step=1, label="Video Length (seconds)", value=10),
279
+ gr.Radio(choices=["portrait", "landscape", "square"], label="Select Aspect Ratio", value="landscape"),
280
+ ],
281
+ outputs=[
282
+ gr.Audio(label="Narration Audio"),
283
+ gr.Video(label="Generated Video"),
284
+ gr.Textbox(label="Generated Script", interactive=False)
285
+ ],
286
+ title="Sepia Text-to-Video Generator",
287
+ description="Enter a text prompt, specify the length of the video (maximum 30 seconds), select the aspect ratio, and click 'Submit' to get the narrated audio, the video and the script.",
288
+ live=False
289
+ )
290
+
291
+ iface.launch(debug=True)