Spaces:

CereusTech
/

Sepia_Text_to_Video

Running

App Files Files Community

Afeezee commited on Apr 21, 2025

Commit

83aa67f

verified ·

1 Parent(s): 34e04a5

Create app.py

Browse files

Files changed (1) hide show

app.py +291 -0

app.py ADDED Viewed

	@@ -0,0 +1,291 @@

+import os
+import gradio as gr
+from cerebras.cloud.sdk import Cerebras
+from gtts import gTTS
+import assemblyai as aai
+from moviepy import VideoFileClip,concatenate_videoclips, AudioFileClip, TextClip, CompositeVideoClip
+import requests
+# Initialize Cerebras client
+Cerekey = os.getenv("Ckey")
+client = Cerebras(api_key= Cerekey)
+# Pexels API key
+pexkey = os.getenv("Pkey")
+PEXELS_API_KEY = pexkey
+# assembly AI API key
+asskey = os.getenv("Akey")
+aai.settings.api_key = asskey
+# Modify the system prompt to include the estimated word count based on video duration
+def generate_script(prompt, max_duration):
+    system_message = f"You are an expert video content creator and narration writer who is proficient in generating narration from user prompts and crafting a concise and poetic narration that aligns with the prompt. Craft a concise, poetic narration for the prompt. Go straight to the narration, don't write a foreward or a description of your action. The narration should be suitable for a video that can be read in less than {max_duration} seconds."
+    stream = client.chat.completions.create(
+        messages=[{"role": "system", "content": system_message}, {"role": "user", "content": prompt}],
+        model="llama-3.3-70b",
+        stream=False,
+        max_completion_tokens=1024,
+        temperature=0.7,
+        top_p=1
+    )
+    return stream.choices[0].message.content
+def search_and_download_videos(query, max_duration, aspect_ratio, download_folder, max_results=6):
+    url = "https://api.pexels.com/videos/search"
+    headers = {"Authorization": PEXELS_API_KEY}
+    params = {"query": query, "per_page": max_results}
+    try:
+        response = requests.get(url, headers=headers, params=params)
+        response.raise_for_status()
+        videos = response.json().get("videos", [])
+        if not os.path.exists(download_folder):
+            os.makedirs(download_folder)
+        downloaded_files = []
+        for video in videos:
+            duration = video.get("duration")
+            width = video.get("width")
+            height = video.get("height")
+            if width and height:
+                video_aspect_ratio = "landscape" if width > height else "portrait" if height > width else "square"
+                if duration <= max_duration and video_aspect_ratio == aspect_ratio:
+                    video_url = video["video_files"][0]["link"]
+                    video_id = video["id"]
+                    video_filename = os.path.join(download_folder, f"{video_id}.mp4")
+                    video_response = requests.get(video_url, stream=True)
+                    with open(video_filename, "wb") as file:
+                        for chunk in video_response.iter_content(chunk_size=1024):
+                            file.write(chunk)
+                    downloaded_files.append(video_filename)
+        return downloaded_files
+    except requests.exceptions.RequestException as e:
+        print(f"Error: {e}")
+        return []
+def generate_narration(script, output_file="narration.mp3"):
+    tts = gTTS(script, lang="en")
+    tts.save(output_file)
+    return output_file
+def load_videos_from_folder(folder_path):
+    if not os.path.exists(folder_path):
+        print(f"Error: The folder '{folder_path}' does not exist.")
+        return []
+    video_files = [
+        os.path.join(folder_path, file)
+        for file in os.listdir(folder_path)
+        if file.endswith(('.mp4', '.mov', '.avi', '.mkv'))
+    ]
+    return video_files
+def aggregate_videos(clips):
+    if not clips:
+        return None
+    return concatenate_videoclips(clips, method="compose")
+def trim_video_to_audio_length(final_video, audio_length):
+    if final_video.duration > audio_length:
+        # Use subclipped method for CompositeVideoClip
+        final_video = final_video.subclipped(0, audio_length)
+    return final_video
+# Function to add narration to the final video
+def add_narration_to_video(final_video, narration_path):
+    if os.path.exists(narration_path):
+        narration_audio = AudioFileClip(narration_path)
+        narration_audio = narration_audio.with_duration(final_video.duration)  # Adjust duration to match video
+        final_video = final_video.with_audio(narration_audio)  # Use with_audio instead of set_audio
+    return final_video
+def save_final_video(final_video, output_path):
+    final_video.write_videofile(output_path, codec="libx264", audio_codec="aac", preset="ultrafast")
+def split_text_into_lines(data):
+    MaxChars   = 40
+    MaxDuration = 2.5
+    MaxGap      = 1.5
+    subtitles = []
+    line = []
+    line_duration = 0
+    line_chars = 0
+    for idx, wd in enumerate(data):
+        # start a new line if too many chars or too long duration
+        if (line_chars + len(wd['word']) > MaxChars) or (line_duration > MaxDuration):
+            subtitles.append({
+                "word": " ".join(w['word'] for w in line),
+                "start": line[0]['start'],
+                "end":   line[-1]['end'],
+                "textcontents": line
+            })
+            line = []
+            line_chars = 0
+            line_duration = 0
+        line.append(wd)
+        line_chars   += len(wd['word'])
+        line_duration = wd['end'] - line[0]['start']
+        # also split on long pause
+        if idx < len(data)-1 and data[idx+1]['start'] - wd['end'] > MaxGap:
+            subtitles.append({
+                "word": " ".join(w['word'] for w in line),
+                "start": line[0]['start'],
+                "end":   wd['end'],
+                "textcontents": line
+            })
+            line = []
+            line_chars = 0
+            line_duration = 0
+    if line:
+        subtitles.append({
+            "word": " ".join(w['word'] for w in line),
+            "start": line[0]['start'],
+            "end":   line[-1]['end'],
+            "textcontents": line
+        })
+    return subtitles
+def generate_video(
+    prompt: str,
+    max_duration: int,
+    aspect_ratio: str,
+    download_folder: str = "downloaded_videos",
+    max_results: int = 6
+):
+    # 1️⃣ Generate the narration script
+    script = generate_script(prompt, max_duration)
+    # 2️⃣ Search & download Pexels videos
+    videos = search_and_download_videos(
+        prompt, max_duration, aspect_ratio, download_folder, max_results
+    )
+    if not videos:
+        return "No videos were downloaded.", None, script
+    # 3️⃣ Load and concatenate downloaded clips
+    video_clips = [VideoFileClip(path) for path in videos]
+    final_video = aggregate_videos(video_clips)
+    if final_video is None:
+        return "Error generating video.", None, script
+    # 4️⃣ Generate TTS narration and attach audio
+    narration_file = generate_narration(script)
+    audio_len = AudioFileClip(narration_file).duration
+    final_video = trim_video_to_audio_length(final_video, audio_len)
+    final_video = add_narration_to_video(final_video, narration_file)
+    # 5️⃣ Transcribe narration for word‑level timings
+    transcript = aai.Transcriber().transcribe(narration_file)
+    wordlevel_info = [
+        {
+            "word": w.text,
+            "start": w.start / 1000.0,
+            "end":   w.end   / 1000.0
+        }
+        for w in transcript.words
+    ]
+    # 6️⃣ Split word‑timestamps into line‑level subtitles
+    linelevel_subs = split_text_into_lines(wordlevel_info)
+    # 7️⃣ Build subtitle clips (static + highlights)
+    fw, fh = final_video.size
+    font, fs, ypos = "Helvetica", 44, fh - 64
+    all_clips = [final_video]
+    for line in linelevel_subs:
+        # ─ Static full‑line text
+        txt = TextClip(
+            line["word"],
+            font=font,
+            fontsize=fs,
+            color="white",
+            method='label',
+            stroke_color="black",
+            stroke_width=1
+        )
+        x0 = (fw - txt.w) / 2
+        static = (
+            txt
+            .set_start(line["start"])
+            .set_duration(line["end"] - line["start"])
+            .set_position((x0, ypos))
+        )
+        all_clips.append(static)
+        # ─ Word‑by‑word highlight
+        cursor = x0
+        for wd in line["textcontents"]:
+            wc = TextClip(
+                wd["word"],
+                font=font,
+                fontsize=fs,
+                color="yellow",
+                method='label',
+                stroke_color="black",
+                stroke_width=1
+            )
+            hl = (
+                wc
+                .set_start(wd["start"])
+                .set_duration(wd["end"] - wd["start"])
+                .set_position((cursor, ypos))
+            )
+            all_clips.append(hl)
+            # advance cursor by measuring a space after the word
+            dummy = TextClip(wd["word"] + " ", font=font, fontsize=fs, method='label',)
+            cursor += dummy.w
+    # 8️⃣ Composite all clips and export
+    subtitled = CompositeVideoClip(all_clips, size=(fw, fh)) \
+                   .set_audio(final_video.audio)
+    output_path = "final_with_subtitles.mp4"
+    subtitled.write_videofile(
+        output_path,
+        fps=24,
+        codec="libx264",
+        audio_codec="aac",
+        preset="ultrafast"
+    )
+    # Return TTS audio path, final video path, and the script
+    return narration_file, output_path, script
+iface = gr.Interface(
+    fn=generate_video,
+    inputs=[
+        gr.Textbox(label="Enter Text Prompt", placeholder="Enter the text to generate the video script."),
+        gr.Slider(minimum=1, maximum=30, step=1, label="Video Length (seconds)", value=10),
+        gr.Radio(choices=["portrait", "landscape", "square"], label="Select Aspect Ratio", value="landscape"),
+    ],
+    outputs=[
+        gr.Audio(label="Narration Audio"),
+        gr.Video(label="Generated Video"),
+        gr.Textbox(label="Generated Script", interactive=False)
+    ],
+    title="Sepia Text-to-Video Generator",
+    description="Enter a text prompt, specify the length of the video (maximum 30 seconds), select the aspect ratio, and click 'Submit' to get the narrated audio, the video and the script.",
+    live=False
+)
+iface.launch(debug=True)