Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from cerebras.cloud.sdk import Cerebras | |
| from gtts import gTTS | |
| import assemblyai as aai | |
| from moviepy import VideoFileClip,concatenate_videoclips, AudioFileClip, TextClip, CompositeVideoClip | |
| import requests | |
| # Initialize Cerebras client | |
| Cerekey = os.getenv("Ckey") | |
| client = Cerebras(api_key= Cerekey) | |
| # Pexels API key | |
| pexkey = os.getenv("Pkey") | |
| PEXELS_API_KEY = pexkey | |
| # assembly AI API key | |
| asskey = os.getenv("Akey") | |
| aai.settings.api_key = asskey | |
| # Modify the system prompt to include the estimated word count based on video duration | |
| def generate_script(prompt, max_duration): | |
| system_message = f"You are an expert video content creator and narration writer who is proficient in generating narration from user prompts and crafting a concise and poetic narration that aligns with the prompt. Craft a concise, poetic narration for the prompt. Go straight to the narration, don't write a foreward or a description of your action. The narration should be suitable for a video that can be read in less than {max_duration} seconds." | |
| stream = client.chat.completions.create( | |
| messages=[{"role": "system", "content": system_message}, {"role": "user", "content": prompt}], | |
| model="llama-3.3-70b", | |
| stream=False, | |
| max_completion_tokens=1024, | |
| temperature=0.7, | |
| top_p=1 | |
| ) | |
| return stream.choices[0].message.content | |
| def search_and_download_videos(query, max_duration, aspect_ratio, download_folder, max_results=6): | |
| url = "https://api.pexels.com/videos/search" | |
| headers = {"Authorization": PEXELS_API_KEY} | |
| params = {"query": query, "per_page": max_results} | |
| try: | |
| response = requests.get(url, headers=headers, params=params) | |
| response.raise_for_status() | |
| videos = response.json().get("videos", []) | |
| if not os.path.exists(download_folder): | |
| os.makedirs(download_folder) | |
| downloaded_files = [] | |
| for video in videos: | |
| duration = video.get("duration") | |
| width = video.get("width") | |
| height = video.get("height") | |
| if width and height: | |
| video_aspect_ratio = "landscape" if width > height else "portrait" if height > width else "square" | |
| if duration <= max_duration and video_aspect_ratio == aspect_ratio: | |
| video_url = video["video_files"][0]["link"] | |
| video_id = video["id"] | |
| video_filename = os.path.join(download_folder, f"{video_id}.mp4") | |
| video_response = requests.get(video_url, stream=True) | |
| with open(video_filename, "wb") as file: | |
| for chunk in video_response.iter_content(chunk_size=1024): | |
| file.write(chunk) | |
| downloaded_files.append(video_filename) | |
| return downloaded_files | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error: {e}") | |
| return [] | |
| def generate_narration(script, output_file="narration.mp3"): | |
| tts = gTTS(script, lang="en") | |
| tts.save(output_file) | |
| return output_file | |
| def load_videos_from_folder(folder_path): | |
| if not os.path.exists(folder_path): | |
| print(f"Error: The folder '{folder_path}' does not exist.") | |
| return [] | |
| video_files = [ | |
| os.path.join(folder_path, file) | |
| for file in os.listdir(folder_path) | |
| if file.endswith(('.mp4', '.mov', '.avi', '.mkv')) | |
| ] | |
| return video_files | |
| def aggregate_videos(clips): | |
| if not clips: | |
| return None | |
| return concatenate_videoclips(clips, method="compose") | |
| def trim_video_to_audio_length(final_video, audio_length): | |
| if final_video.duration > audio_length: | |
| # Use subclipped method for CompositeVideoClip | |
| final_video = final_video.subclipped(0, audio_length) | |
| return final_video | |
| # Function to add narration to the final video | |
| def add_narration_to_video(final_video, narration_path): | |
| if os.path.exists(narration_path): | |
| narration_audio = AudioFileClip(narration_path) | |
| narration_audio = narration_audio.with_duration(final_video.duration) # Adjust duration to match video | |
| final_video = final_video.with_audio(narration_audio) # Use with_audio instead of set_audio | |
| return final_video | |
| def save_final_video(final_video, output_path): | |
| final_video.write_videofile(output_path, codec="libx264", audio_codec="aac", preset="ultrafast") | |
| def split_text_into_lines(data): | |
| MaxChars = 40 | |
| MaxDuration = 2.5 | |
| MaxGap = 1.5 | |
| subtitles = [] | |
| line = [] | |
| line_duration = 0 | |
| line_chars = 0 | |
| for idx, wd in enumerate(data): | |
| # start a new line if too many chars or too long duration | |
| if (line_chars + len(wd['word']) > MaxChars) or (line_duration > MaxDuration): | |
| subtitles.append({ | |
| "word": " ".join(w['word'] for w in line), | |
| "start": line[0]['start'], | |
| "end": line[-1]['end'], | |
| "textcontents": line | |
| }) | |
| line = [] | |
| line_chars = 0 | |
| line_duration = 0 | |
| line.append(wd) | |
| line_chars += len(wd['word']) | |
| line_duration = wd['end'] - line[0]['start'] | |
| # also split on long pause | |
| if idx < len(data)-1 and data[idx+1]['start'] - wd['end'] > MaxGap: | |
| subtitles.append({ | |
| "word": " ".join(w['word'] for w in line), | |
| "start": line[0]['start'], | |
| "end": wd['end'], | |
| "textcontents": line | |
| }) | |
| line = [] | |
| line_chars = 0 | |
| line_duration = 0 | |
| if line: | |
| subtitles.append({ | |
| "word": " ".join(w['word'] for w in line), | |
| "start": line[0]['start'], | |
| "end": line[-1]['end'], | |
| "textcontents": line | |
| }) | |
| return subtitles | |
| def generate_video( | |
| prompt: str, | |
| max_duration: int, | |
| aspect_ratio: str, | |
| download_folder: str = "downloaded_videos", | |
| max_results: int = 6 | |
| ): | |
| # 1️⃣ Generate the narration script | |
| script = generate_script(prompt, max_duration) | |
| # 2️⃣ Search & download Pexels videos | |
| videos = search_and_download_videos( | |
| prompt, max_duration, aspect_ratio, download_folder, max_results | |
| ) | |
| if not videos: | |
| return "No videos were downloaded.", None, script | |
| # 3️⃣ Load and concatenate downloaded clips | |
| video_clips = [VideoFileClip(path) for path in videos] | |
| final_video = aggregate_videos(video_clips) | |
| if final_video is None: | |
| return "Error generating video.", None, script | |
| # 4️⃣ Generate TTS narration and attach audio | |
| narration_file = generate_narration(script) | |
| audio_len = AudioFileClip(narration_file).duration | |
| final_video = trim_video_to_audio_length(final_video, audio_len) | |
| final_video = add_narration_to_video(final_video, narration_file) | |
| # 5️⃣ Transcribe narration for word‑level timings | |
| transcript = aai.Transcriber().transcribe(narration_file) | |
| wordlevel_info = [ | |
| { | |
| "word": w.text, | |
| "start": w.start / 1000.0, | |
| "end": w.end / 1000.0 | |
| } | |
| for w in transcript.words | |
| ] | |
| # 6️⃣ Split word‑timestamps into line‑level subtitles | |
| linelevel_subs = split_text_into_lines(wordlevel_info) | |
| # 7️⃣ Build subtitle clips (static + highlights) | |
| fw, fh = final_video.size | |
| font, fs, ypos = "Helvetica", 44, fh - 64 | |
| all_clips = [final_video] | |
| for line in linelevel_subs: | |
| # ─ Static full‑line text | |
| txt = TextClip( | |
| line["word"], | |
| font=font, | |
| fontsize=fs, | |
| color="white", | |
| stroke_color="black", | |
| stroke_width=1 | |
| ) | |
| x0 = (fw - txt.w) / 2 | |
| static = ( | |
| txt | |
| .set_start(line["start"]) | |
| .set_duration(line["end"] - line["start"]) | |
| .set_position((x0, ypos)) | |
| ) | |
| all_clips.append(static) | |
| # ─ Word‑by‑word highlight | |
| cursor = x0 | |
| for wd in line["textcontents"]: | |
| wc = TextClip( | |
| wd["word"], | |
| font=font, | |
| fontsize=fs, | |
| color="yellow", | |
| stroke_color="black", | |
| stroke_width=1 | |
| ) | |
| hl = ( | |
| wc | |
| .set_start(wd["start"]) | |
| .set_duration(wd["end"] - wd["start"]) | |
| .set_position((cursor, ypos)) | |
| ) | |
| all_clips.append(hl) | |
| # advance cursor by measuring a space after the word | |
| dummy = TextClip(wd["word"] + " ", font=font, fontsize=fs) | |
| cursor += dummy.w | |
| # 8️⃣ Composite all clips and export | |
| subtitled = CompositeVideoClip(all_clips, size=(fw, fh)) \ | |
| .set_audio(final_video.audio) | |
| output_path = "final_with_subtitles.mp4" | |
| subtitled.write_videofile( | |
| output_path, | |
| fps=24, | |
| codec="libx264", | |
| audio_codec="aac", | |
| preset="ultrafast" | |
| ) | |
| # Return TTS audio path, final video path, and the script | |
| return narration_file, output_path, script | |
| iface = gr.Interface( | |
| fn=generate_video, | |
| inputs=[ | |
| gr.Textbox(label="Enter Text Prompt", placeholder="Enter the text to generate the video script."), | |
| gr.Slider(minimum=1, maximum=30, step=1, label="Video Length (seconds)", value=10), | |
| gr.Radio(choices=["portrait", "landscape", "square"], label="Select Aspect Ratio", value="landscape"), | |
| ], | |
| outputs=[ | |
| gr.Audio(label="Narration Audio"), | |
| gr.Video(label="Generated Video"), | |
| gr.Textbox(label="Generated Script", interactive=False) | |
| ], | |
| title="Sepia Text-to-Video Generator", | |
| description="Enter a text prompt, specify the length of the video (maximum 30 seconds), select the aspect ratio, and click 'Submit' to get the narrated audio, the video and the script.", | |
| live=False | |
| ) | |
| iface.launch(debug=True) |