Spaces:

Shreevathsam
/

Video_generator_tool

Runtime error

App Files Files Community

Shreevathsam commited on Sep 21, 2025

Commit

5e56d63

verified ·

1 Parent(s): 9b6da36

Update app.py

Browse files

Files changed (1) hide show

app.py +165 -320

app.py CHANGED Viewed

@@ -5,25 +5,22 @@ import whisper
 import shutil
 import wave
 import base64
-from moviepy.editor import (VideoFileClip, AudioFileClip, TextClip,
-                            concatenate_videoclips, CompositeVideoClip, CompositeAudioClip, ImageClip)
 import moviepy.audio.fx.all as afx
 import moviepy.video.fx.all as vfx
 import gradio as gr
-from PIL import Image, ImageDraw, ImageFilter, ImageFont
 import numpy as np
-from functools import lru_cache
 import urllib.request
 from google import genai
 from google.genai import types
-# Create necessary directories
 os.makedirs('video_clips', exist_ok=True)
 os.makedirs('background_music', exist_ok=True)
 os.makedirs('voice_over', exist_ok=True)
 os.makedirs('exports', exist_ok=True)
-# Get API key from environment variable (will be set in Hugging Face Space settings)
 GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY', '')
 if GOOGLE_API_KEY:
     os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
@@ -77,7 +74,7 @@ def generate_tts_audio(text_input, voice_name="Puck"):
         return None, f"Error: {str(e)}"
 def split_text_into_lines(data):
-    MaxChars, MaxDuration, MaxGap = 60, 2.5, 1.5
     subtitles, line, line_duration = [], [], 0
     for idx, word_data in enumerate(data):
         line.append(word_data)
@@ -104,188 +101,100 @@ def split_text_into_lines(data):
         })
     return subtitles
-@lru_cache(maxsize=1000)
-def get_cached_text_clip(text, font, fontsize, color):
-    return TextClip(text, font=font, fontsize=fontsize, color=color)
 def create_title_overlay(title_text, framesize, duration=4):
     if not title_text or not title_text.strip():
         return []
     frame_width, frame_height = framesize
-    FONT_URL = "https://github.com/google/fonts/raw/main/ofl/poppins/Poppins-Bold.ttf"
-    FONT_PATH = "/tmp/Poppins-Bold.ttf"
-    if not os.path.exists(FONT_PATH):
-        try:
             urllib.request.urlretrieve(FONT_URL, FONT_PATH)
-        except:
-            FONT_PATH = None
-    TOP_MARGIN = int(frame_height * 0.115)
-    FONT_SIZE = int(frame_height * 0.042)
-    STROKE_WIDTH = max(1, int(frame_height * 0.003))
-    LINE_SPACING = max(4, int(frame_height * 0.008))
-    def load_font(size):
-        try:
-            if FONT_PATH and os.path.exists(FONT_PATH):
-                return ImageFont.truetype(FONT_PATH, size)
-            return ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", size)
-        except:
-            return ImageFont.load_default()
-    font_obj = load_font(FONT_SIZE)
     base = Image.new("RGBA", (frame_width, frame_height), (0, 0, 0, 0))
-    temp_img = Image.new("RGBA", (frame_width, frame_height), (0,0,0,0))
-    temp_draw = ImageDraw.Draw(temp_img)
-    def measure_text(text, font):
-        try:
-            bbox = temp_draw.textbbox((0,0), text, font=font, stroke_width=STROKE_WIDTH)
-            return bbox[2]-bbox[0], bbox[3]-bbox[1]
-        except:
-            return 100, 50
-    def wrap_text(text, font, max_width):
-        words = text.upper().split()
-        lines, current = [], []
-        for word in words:
-            test_line = " ".join(current + [word])
-            w, _ = measure_text(test_line, font)
-            if w <= max_width:
-                current.append(word)
-            else:
-                if current:
-                    lines.append(" ".join(current))
-                    current = [word]
-                else:
-                    lines.append(word)
-                    current = []
-        if current:
-            lines.append(" ".join(current))
-        return lines[:4]
-    lines = wrap_text(title_text, font_obj, frame_width * 0.90)
-    line_heights = [measure_text(line, font_obj)[1] for line in lines]
-    y_start = TOP_MARGIN
-    x_center = frame_width // 2
     draw = ImageDraw.Draw(base)
-    y = y_start
-    for i, line in enumerate(lines):
-        w, h = measure_text(line, font_obj)
-        x = x_center - w // 2
-        draw.text((x+2, y+2), line, font=font_obj, fill=(0,0,0,180))
-        draw.text((x, y), line, font=font_obj, fill=(255,255,255,255), stroke_width=STROKE_WIDTH, stroke_fill=(0,0,0,255))
-        y += line_heights[i] + LINE_SPACING
     return [ImageClip(np.array(base), duration=duration)]
-def create_caption(textJSON, framesize, font="Helvetica-Bold", fontsize=14, color='white'):
-    full_duration = textJSON['end'] - textJSON['start']
-    word_clips = []
-    xy_textclips_positions = []
-    frame_width, frame_height = framesize
-    max_line_width = frame_width * 0.8
-    lines, current_line, current_line_width = [], [], 0
-    for wordJSON in textJSON['textcontents']:
-        word_upper = wordJSON['word'].upper()
-        temp_word = get_cached_text_clip(word_upper, font, fontsize, color)
-        temp_space = get_cached_text_clip(" ", font, fontsize, color)
-        word_width, word_height = temp_word.size
-        space_width, _ = temp_space.size
-        if current_line_width + word_width + space_width > max_line_width and current_line:
-            lines.append({'words': current_line.copy(), 'width': current_line_width, 'height': word_height})
-            current_line = [wordJSON]
-            current_line_width = word_width + space_width
-        else:
-            current_line.append(wordJSON)
-            current_line_width += word_width + space_width
-    if current_line:
-        word_upper = current_line[0]['word'].upper()
-        temp_word = get_cached_text_clip(word_upper, font, fontsize, color)
-        _, word_height = temp_word.size
-        lines.append({'words': current_line, 'width': current_line_width, 'height': word_height})
-    total_text_height = sum(line['height'] for line in lines) + (len(lines) - 1) * 3
-    subtitle_y_position = int(frame_height * 0.65)
-    current_y = subtitle_y_position
-    if lines:
-        shadow_padding = 25
-        shadow_height_extra = 15
-        total_subtitle_width = max(line['width'] for line in lines)
-        bg_width = int(total_subtitle_width + shadow_padding * 2)
-        bg_height = int(total_text_height + shadow_height_extra * 2)
-        img = Image.new('RGBA', (bg_width, bg_height), (0, 0, 0, 0))
-        draw = ImageDraw.Draw(img)
-        draw.rounded_rectangle([(0, 0), (bg_width-1, bg_height-1)], radius=15, fill=(0, 0, 0, 128))
-        img_array = np.array(img)
-        shadow_bg = ImageClip(img_array, duration=full_duration).set_start(textJSON['start'])
-        shadow_x = (frame_width - total_subtitle_width) / 2 - shadow_padding
-        shadow_y = subtitle_y_position - shadow_height_extra
-        shadow_bg = shadow_bg.set_position((shadow_x, shadow_y))
-        word_clips.append(shadow_bg)
-    for line in lines:
-        line_words = line['words']
-        word_dimensions = []
-        for wordJSON in line_words:
-            word_upper = wordJSON['word'].upper()
-            temp_word = get_cached_text_clip(word_upper, font, fontsize, color)
-            temp_space = get_cached_text_clip(" ", font, fontsize, color)
-            word_width, word_height = temp_word.size
-            space_width, _ = temp_space.size
-            word_dimensions.append({
-                'word_data': wordJSON,
-                'word_width': word_width,
-                'word_height': word_height,
-                'space_width': space_width,
-                'word_upper': word_upper
-            })
-        line_start_x = (frame_width - line['width']) / 2
-        current_x = line_start_x
-        for word_dim in word_dimensions:
-            wordJSON = word_dim['word_data']
-            word_width = word_dim['word_width']
-            word_height = word_dim['word_height']
-            space_width = word_dim['space_width']
-            word_upper = word_dim['word_upper']
-            shadow_text = get_cached_text_clip(word_upper, font, fontsize, 'black')
-            shadow_text = shadow_text.set_start(textJSON['start']).set_duration(full_duration)
-            shadow_text = shadow_text.set_position((current_x + 1, current_y + 1)).set_opacity(0.3)
-            word_clips.append(shadow_text)
-            word_clip = get_cached_text_clip(word_upper, font, fontsize, color)
-            word_clip = word_clip.set_start(textJSON['start']).set_duration(full_duration)
-            word_clip = word_clip.set_position((current_x, current_y))
-            space_clip = get_cached_text_clip(" ", font, fontsize, color)
-            space_clip = space_clip.set_start(textJSON['start']).set_duration(full_duration)
-            space_clip = space_clip.set_position((current_x + word_width, current_y))
-            xy_textclips_positions.append({
-                "x_pos": current_x,
-                "y_pos": current_y,
-                "width": word_width,
-                "height": word_height,
-                "word": word_upper,
-                "start": wordJSON['start'],
-                "end": wordJSON['end'],
-                "duration": wordJSON['end'] - wordJSON['start']
-            })
-            word_clips.append(word_clip)
-            word_clips.append(space_clip)
-            current_x += word_width + space_width
-        current_y += line['height'] + 3
-    for highlight_word in xy_textclips_positions:
-        bg_width = int(highlight_word['width'] + 16)
-        bg_height = int(highlight_word['height'] + 8)
-        img = Image.new('RGBA', (bg_width, bg_height), (0, 0, 0, 0))
-        draw = ImageDraw.Draw(img)
-        draw.rounded_rectangle([(0, 0), (bg_width-1, bg_height-1)], radius=8, fill=(147, 0, 211, 180))
-        img_array = np.array(img)
-        bg_clip = ImageClip(img_array, duration=highlight_word['duration'])
-        bg_clip = bg_clip.set_start(highlight_word['start'])
-        bg_x = highlight_word['x_pos'] - 8
-        bg_y = highlight_word['y_pos'] - 4
-        bg_clip = bg_clip.set_position((bg_x, bg_y))
-        shadow_highlight = get_cached_text_clip(highlight_word['word'], font, fontsize, 'black')
-        shadow_highlight = shadow_highlight.set_start(highlight_word['start']).set_duration(highlight_word['duration'])
-        shadow_highlight = shadow_highlight.set_position((highlight_word['x_pos'] + 1, highlight_word['y_pos'] + 1)).set_opacity(0.4)
-        word_clip_highlight = get_cached_text_clip(highlight_word['word'], font, fontsize, 'white')
-        word_clip_highlight = word_clip_highlight.set_start(highlight_word['start']).set_duration(highlight_word['duration'])
-        word_clip_highlight = word_clip_highlight.set_position((highlight_word['x_pos'], highlight_word['y_pos']))
-        word_clips.append(bg_clip)
-        word_clips.append(shadow_highlight)
-        word_clips.append(word_clip_highlight)
-    return word_clips
 def get_random_subclip_and_slow(clip):
     subclip_durations = [2, 3, 4]
     subclip_duration = random.choice(subclip_durations)
@@ -369,41 +278,37 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
     generation_cancelled = False
     current_video_clip = None
     progress(0, desc="Starting...")
-    if generation_cancelled:
-        return None, "Generation cancelled"
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    # Updated paths for Hugging Face
     source_path = 'video_clips'
     if not os.path.isdir(source_path):
-        return None, "Video clips folder not found. Please upload video clips to the 'video_clips' folder."
     output_path = 'exports'
     os.makedirs(output_path, exist_ok=True)
     video_extensions = ('.mp4', '.avi', '.mkv', '.mov')
     all_files = [f for f in os.listdir(source_path) if f.lower().endswith(video_extensions)]
     if not all_files:
-        return None, "No video files found in 'video_clips' folder"
     random.shuffle(all_files)
-    if generation_cancelled:
-        return None, "Generation cancelled"
     bg_music_path = None
     bg_music_folder_path = 'background_music'
     if os.path.isdir(bg_music_folder_path):
-        audio_extensions = ('.mp3', '.wav', '.m4a', '.aac')
-        possible_files = [f for f in os.listdir(bg_music_folder_path) if f.lower().endswith(audio_extensions) and not f.startswith('voiceover_')]
-        if len(possible_files) >= 1:
             bg_music_path = os.path.join(bg_music_folder_path, possible_files[0])
-    target_duration_seconds = 0
-    voice_over_audio = None
-    linelevel_subtitles = None
     voice_over_path = None
     if text_input and text_input.strip():
         progress(0.1, desc="Generating TTS...")
         voice_name = AVAILABLE_VOICES[voice_selection]["name"] if voice_selection in AVAILABLE_VOICES else "Puck"
         tts_path, tts_message = generate_tts_audio(text_input, voice_name)
-        if generation_cancelled:
-            return None, "Generation cancelled"
         if tts_path:
             voice_over_folder_path = 'voice_over'
             os.makedirs(voice_over_folder_path, exist_ok=True)
@@ -414,39 +319,33 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
         else:
             return None, f"TTS failed: {tts_message}"
     elif audio_input:
-        if generation_cancelled:
-            return None, "Generation cancelled"
         voice_over_folder_path = 'voice_over'
         os.makedirs(voice_over_folder_path, exist_ok=True)
         voice_filename = f"uploaded_voiceover_{timestamp}.mp3"
         saved_voice_path = os.path.join(voice_over_folder_path, voice_filename)
         shutil.copy2(audio_input, saved_voice_path)
         voice_over_path = saved_voice_path
     if voice_over_path:
         try:
             progress(0.2, desc="Processing audio...")
-            if generation_cancelled:
-                return None, "Generation cancelled"
             voice_over_audio = AudioFileClip(voice_over_path)
             target_duration_seconds = voice_over_audio.duration
             linelevel_subtitles, _ = process_voiceover_to_subtitles(voice_over_path)
-            if generation_cancelled:
-                voice_over_audio.close()
-                return None, "Generation cancelled"
         except Exception as e:
             return None, f"Audio error: {str(e)}"
     else:
         if not bg_music_path:
             return None, "Need text/audio or background music"
         target_duration_seconds = duration_minutes * 60
     progress(0.3, desc="Preparing audio...")
-    if generation_cancelled:
-        if voice_over_audio:
-            voice_over_audio.close()
-        return None, "Generation cancelled"
     audio_tracks = []
     if voice_over_audio:
         audio_tracks.append(voice_over_audio)
     if bg_music_path:
         try:
             background_audio = AudioFileClip(bg_music_path)
@@ -455,45 +354,35 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
             audio_tracks.append(background_audio)
         except Exception as e:
             print(f"Background music error: {e}")
     final_audio = CompositeAudioClip(audio_tracks) if len(audio_tracks) > 1 else (audio_tracks[0] if audio_tracks else None)
     progress(0.4, desc="Setting up video...")
-    if generation_cancelled:
-        cleanup_resources()
-        return None, "Generation cancelled"
     if video_quality == "High":
         target_height, bitrate, preset, crf = 1080, "8000k", "veryfast", "20"
     elif video_quality == "Standard":
         target_height, bitrate, preset, crf = 720, "4000k", "veryfast", "24"
     else:
         target_height, bitrate, preset, crf = 480, "1000k", "ultrafast", "28"
     progress(0.5, desc="Processing clips...")
     video_clips = []
     current_duration = 0
     file_index = 0
-    safety_counter = 0
-    max_iterations = len(all_files) * 3
-    while current_duration < target_duration_seconds and safety_counter < max_iterations:
-        if generation_cancelled:
-            for clip in video_clips:
-                try:
-                    clip.close()
-                except:
-                    pass
-            cleanup_resources()
-            return None, "Generation cancelled"
         if file_index >= len(all_files):
             file_index = 0
             random.shuffle(all_files)
         video_file = all_files[file_index]
         file_index += 1
-        safety_counter += 1
         try:
             full_clip = VideoFileClip(os.path.join(source_path, video_file))
-            current_video_clip = full_clip
-            if generation_cancelled:
-                full_clip.close()
-                cleanup_resources()
-                return None, "Generation cancelled"
             if full_clip.h != target_height:
                 aspect_ratio = full_clip.w / full_clip.h
                 new_width = int(target_height * aspect_ratio)
@@ -503,40 +392,37 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
                 full_clip = full_clip.resize((new_width, adjusted_height))
             else:
                 full_clip = ensure_even_dimensions(full_clip)
             subclip = get_random_subclip_and_slow(full_clip)
             remaining_duration = target_duration_seconds - current_duration
             if subclip.duration > remaining_duration:
                 subclip = subclip.subclip(0, remaining_duration)
             video_clips.append(ensure_even_dimensions(subclip))
             current_duration += subclip.duration
-            progress(0.5 + (safety_counter * 0.1 / max_iterations), desc=f"Clip {len(video_clips)}")
         except Exception as e:
-            print(f"Error: {e}")
             continue
-    if generation_cancelled:
-        for clip in video_clips:
-            try:
-                clip.close()
-            except:
-                pass
-        cleanup_resources()
-        return None, "Generation cancelled"
     if not video_clips:
         return None, "No clips processed"
     total_video_duration = sum(clip.duration for clip in video_clips)
     duration_diff = total_video_duration - target_duration_seconds
     if abs(duration_diff) > 0.1:
         if duration_diff > 0:
             trim_amount = duration_diff
-            new_last_clip = video_clips[-1].subclip(0, video_clips[-1].duration - trim_amount)
-            video_clips[-1] = new_last_clip
         else:
             extend_amount = abs(duration_diff)
-            new_last_clip = video_clips[-1].fx(vfx.loop, duration=video_clips[-1].duration + extend_amount)
-            video_clips[-1] = new_last_clip
     progress(0.6, desc="Applying transitions...")
     transition_duration = {"Snap Cut": 0.1, "Whip Pan": 0.3, "Dreamy Fade": 0.8, "Smooth Blend": 0.5, "Ken Burns Zoom": 0.5}.get(transition_type, 0.5)
     processed_clips = []
     for i in range(len(video_clips)):
         if i == 0:
@@ -551,67 +437,45 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
         else:
             _, clip_with_transition = apply_transition_effect(video_clips[i-1], video_clips[i], transition_type, transition_duration)
             processed_clips.append(clip_with_transition)
     progress(0.7, desc="Concatenating...")
-    if generation_cancelled:
-        for c in processed_clips:
-            try:
-                c.close()
-            except:
-                pass
-        cleanup_resources()
-        return None, "Generation cancelled"
     if transition_type == "Snap Cut":
         final_video_only = concatenate_videoclips(processed_clips, method="compose")
     else:
         final_video_only = concatenate_videoclips(processed_clips, method="compose", padding=-transition_duration)
     final_video_only = ensure_even_dimensions(final_video_only)
-    current_video_clip = final_video_only
-    if final_audio:
-        final_video_only = final_video_only.set_duration(final_audio.duration)
     progress(0.8, desc="Adding overlays...")
-    if generation_cancelled:
-        try:
-            final_video_only.close()
-        except:
-            pass
-        cleanup_resources()
-        return None, "Generation cancelled"
-    all_subtitle_clips = []
-    if linelevel_subtitles:
-        for line in linelevel_subtitles:
-            if generation_cancelled:
-                try:
-                    final_video_only.close()
-                except:
-                    pass
-                cleanup_resources()
-                return None, "Generation cancelled"
-            try:
-                subtitle_fontsize = min(42, final_video_only.size[1] // 25)
-                all_subtitle_clips.extend(create_caption(line, final_video_only.size, font="Helvetica-Bold", fontsize=subtitle_fontsize, color='white'))
-            except Exception as e:
-                print(f"Subtitle error: {e}")
-                continue
     all_clips = [final_video_only.set_opacity(0.65)]
-    if all_subtitle_clips:
-        all_clips.extend(all_subtitle_clips)
     if title_text and title_text.strip():
         title_clips = create_title_overlay(title_text, final_video_only.size, duration=4)
         all_clips.extend(title_clips)
     final_video = CompositeVideoClip(all_clips)
-    current_video_clip = final_video
     if final_audio:
         final_video = final_video.set_audio(final_audio)
     progress(0.9, desc="Exporting...")
-    if generation_cancelled:
-        try:
-            final_video.close()
-        except:
-            pass
-        cleanup_resources()
-        return None, "Generation cancelled"
     output_filename = f'video_{timestamp}.mp4'
     final_output_path = os.path.join(output_path, output_filename)
     try:
         final_video.write_videofile(
             final_output_path,
@@ -622,65 +486,50 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
             bitrate=bitrate,
             audio_bitrate="128k",
             threads=8,
-            ffmpeg_params=["-crf", crf, "-pix_fmt", "yuv420p", "-movflags", "+faststart", "-tune", "fastdecode"]
         )
     except Exception as e:
-        if generation_cancelled:
-            return None, "Generation cancelled"
         return None, f"Export error: {str(e)}"
     progress(1.0, desc="Done")
-    if generation_cancelled:
-        try:
-            if os.path.exists(final_output_path):
-                os.remove(final_output_path)
-        except:
-            pass
-        cleanup_resources()
-        return None, "Generation cancelled"
     try:
         final_video.close()
         if voice_over_audio:
             voice_over_audio.close()
-        current_video_clip = None
     except:
         pass
-    audio_source = ""
-    if text_input and text_input.strip():
-        audio_source = f"TTS ({AVAILABLE_VOICES[voice_selection]['name'] if voice_selection in AVAILABLE_VOICES else 'Puck'})"
-    elif voice_over_path:
-        audio_source = "Uploaded Audio"
-    else:
-        audio_source = "Background Music"
-    summary = f"Complete\n{output_filename}\n{audio_source}\n{transition_type}\n{target_duration_seconds:.1f}s\n{len(linelevel_subtitles) if linelevel_subtitles else 0} subtitles"
     return final_output_path, summary
 with gr.Blocks(title="Video Generator", theme=gr.themes.Soft()) as interface:
     gr.Markdown("# 🎬 AI Video Generator")
-    gr.Markdown("Upload video clips to `video_clips` folder and optionally background music to `background_music` folder.")
     with gr.Row():
         with gr.Column():
-            text_input = gr.Textbox(label="Text for TTS", lines=4, placeholder="Enter text to convert to speech...")
             voice_dropdown = gr.Dropdown(
                 choices=[(f"{v['name']} - {v['description']}", k) for k, v in AVAILABLE_VOICES.items()],
                 value="Puck",
-                label="Voice Selection"
             )
-            audio_input = gr.Audio(type="filepath", label="Or Upload Audio File")
-            title_input = gr.Textbox(label="Video Title (Optional)", lines=2, placeholder="Enter video title...")
-            duration_slider = gr.Slider(0.5, 10, 2, 0.5, label="Duration (minutes) - only used if no audio")
-            quality_radio = gr.Radio(["High", "Standard", "Preview"], value="High", label="Video Quality")
             transition_radio = gr.Radio(
                 ["Smooth Blend", "Ken Burns Zoom", "Whip Pan", "Dreamy Fade", "Snap Cut"],
                 value="Smooth Blend",
-                label="Transition Effect"
             )
             with gr.Row():
-                submit_btn = gr.Button("🎥 Generate Video", variant="primary", size="lg")
-                stop_btn = gr.Button("⏹️ Stop", variant="stop", size="lg")
         with gr.Column():
-            video_output = gr.Video(label="Generated Video")
             summary_output = gr.Textbox(label="Status", lines=8)
     submit_btn.click(
@@ -691,8 +540,4 @@ with gr.Blocks(title="Video Generator", theme=gr.themes.Soft()) as interface:
     stop_btn.click(fn=cancel_generation, outputs=[summary_output, video_output])
 if __name__ == "__main__":
-    interface.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

 import shutil
 import wave
 import base64
+from moviepy.editor import (VideoFileClip, AudioFileClip, concatenate_videoclips,
+                            CompositeVideoClip, CompositeAudioClip, ImageClip)
 import moviepy.audio.fx.all as afx
 import moviepy.video.fx.all as vfx
 import gradio as gr
+from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 import urllib.request
 from google import genai
 from google.genai import types
 os.makedirs('video_clips', exist_ok=True)
 os.makedirs('background_music', exist_ok=True)
 os.makedirs('voice_over', exist_ok=True)
 os.makedirs('exports', exist_ok=True)
 GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY', '')
 if GOOGLE_API_KEY:
     os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
         return None, f"Error: {str(e)}"
 def split_text_into_lines(data):
+    MaxChars, MaxDuration, MaxGap = 40, 2.5, 1.5
     subtitles, line, line_duration = [], [], 0
     for idx, word_data in enumerate(data):
         line.append(word_data)
         })
     return subtitles
+def create_subtitle_image(text, frame_size, fontsize=42):
+    """Create subtitle as PIL Image - more reliable than TextClip"""
+    frame_width, frame_height = frame_size
+    # Load font
+    FONT_PATH = None
+    try:
+        FONT_URL = "https://github.com/google/fonts/raw/main/ofl/poppins/Poppins-Bold.ttf"
+        FONT_PATH = "/tmp/Poppins-Bold.ttf"
+        if not os.path.exists(FONT_PATH):
+            urllib.request.urlretrieve(FONT_URL, FONT_PATH)
+        font = ImageFont.truetype(FONT_PATH, fontsize)
+    except:
+        font = ImageFont.load_default()
+    # Create transparent image
+    img = Image.new('RGBA', (frame_width, frame_height), (0, 0, 0, 0))
+    draw = ImageDraw.Draw(img)
+    # Get text size
+    bbox = draw.textbbox((0, 0), text.upper(), font=font)
+    text_width = bbox[2] - bbox[0]
+    text_height = bbox[3] - bbox[1]
+    # Position at bottom center
+    x = (frame_width - text_width) // 2
+    y = int(frame_height * 0.75)
+    # Draw background
+    padding = 20
+    bg_x1 = x - padding
+    bg_y1 = y - padding
+    bg_x2 = x + text_width + padding
+    bg_y2 = y + text_height + padding
+    draw.rounded_rectangle([bg_x1, bg_y1, bg_x2, bg_y2], radius=15, fill=(0, 0, 0, 180))
+    # Draw text with shadow
+    draw.text((x+2, y+2), text.upper(), font=font, fill=(0, 0, 0, 255))
+    draw.text((x, y), text.upper(), font=font, fill=(255, 255, 255, 255))
+    return np.array(img)
+def create_simple_subtitles(subtitle_data, frame_size, total_duration):
+    """Create simple, reliable subtitles using ImageClips"""
+    subtitle_clips = []
+    for item in subtitle_data:
+        text = item['word']
+        start_time = item['start']
+        end_time = item['end']
+        duration = end_time - start_time
+        # Create subtitle image
+        img_array = create_subtitle_image(text, frame_size)
+        # Create ImageClip
+        clip = ImageClip(img_array, duration=duration)
+        clip = clip.set_start(start_time)
+        subtitle_clips.append(clip)
+    return subtitle_clips
 def create_title_overlay(title_text, framesize, duration=4):
     if not title_text or not title_text.strip():
         return []
     frame_width, frame_height = framesize
+    try:
+        FONT_URL = "https://github.com/google/fonts/raw/main/ofl/poppins/Poppins-Bold.ttf"
+        FONT_PATH = "/tmp/Poppins-Bold.ttf"
+        if not os.path.exists(FONT_PATH):
             urllib.request.urlretrieve(FONT_URL, FONT_PATH)
+        font = ImageFont.truetype(FONT_PATH, int(frame_height * 0.06))
+    except:
+        font = ImageFont.load_default()
     base = Image.new("RGBA", (frame_width, frame_height), (0, 0, 0, 0))
     draw = ImageDraw.Draw(base)
+    # Simple centered title
+    text = title_text.upper()
+    bbox = draw.textbbox((0, 0), text, font=font)
+    text_width = bbox[2] - bbox[0]
+    x = (frame_width - text_width) // 2
+    y = int(frame_height * 0.1)
+    # Shadow and text
+    draw.text((x+3, y+3), text, font=font, fill=(0, 0, 0, 200))
+    draw.text((x, y), text, font=font, fill=(255, 255, 255, 255))
     return [ImageClip(np.array(base), duration=duration)]
 def get_random_subclip_and_slow(clip):
     subclip_durations = [2, 3, 4]
     subclip_duration = random.choice(subclip_durations)
     generation_cancelled = False
     current_video_clip = None
     progress(0, desc="Starting...")
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     source_path = 'video_clips'
     if not os.path.isdir(source_path):
+        return None, "Video clips folder not found"
     output_path = 'exports'
     os.makedirs(output_path, exist_ok=True)
     video_extensions = ('.mp4', '.avi', '.mkv', '.mov')
     all_files = [f for f in os.listdir(source_path) if f.lower().endswith(video_extensions)]
     if not all_files:
+        return None, "No video files found"
     random.shuffle(all_files)
     bg_music_path = None
     bg_music_folder_path = 'background_music'
     if os.path.isdir(bg_music_folder_path):
+        audio_extensions = ('.mp3', '.wav', '.aac')
+        possible_files = [f for f in os.listdir(bg_music_folder_path) if f.lower().endswith(audio_extensions)]
+        if possible_files:
             bg_music_path = os.path.join(bg_music_folder_path, possible_files[0])
     voice_over_path = None
+    linelevel_subtitles = None
     if text_input and text_input.strip():
         progress(0.1, desc="Generating TTS...")
         voice_name = AVAILABLE_VOICES[voice_selection]["name"] if voice_selection in AVAILABLE_VOICES else "Puck"
         tts_path, tts_message = generate_tts_audio(text_input, voice_name)
         if tts_path:
             voice_over_folder_path = 'voice_over'
             os.makedirs(voice_over_folder_path, exist_ok=True)
         else:
             return None, f"TTS failed: {tts_message}"
     elif audio_input:
         voice_over_folder_path = 'voice_over'
         os.makedirs(voice_over_folder_path, exist_ok=True)
         voice_filename = f"uploaded_voiceover_{timestamp}.mp3"
         saved_voice_path = os.path.join(voice_over_folder_path, voice_filename)
         shutil.copy2(audio_input, saved_voice_path)
         voice_over_path = saved_voice_path
     if voice_over_path:
         try:
             progress(0.2, desc="Processing audio...")
             voice_over_audio = AudioFileClip(voice_over_path)
             target_duration_seconds = voice_over_audio.duration
             linelevel_subtitles, _ = process_voiceover_to_subtitles(voice_over_path)
         except Exception as e:
             return None, f"Audio error: {str(e)}"
     else:
         if not bg_music_path:
             return None, "Need text/audio or background music"
         target_duration_seconds = duration_minutes * 60
+        voice_over_audio = None
     progress(0.3, desc="Preparing audio...")
     audio_tracks = []
     if voice_over_audio:
         audio_tracks.append(voice_over_audio)
     if bg_music_path:
         try:
             background_audio = AudioFileClip(bg_music_path)
             audio_tracks.append(background_audio)
         except Exception as e:
             print(f"Background music error: {e}")
     final_audio = CompositeAudioClip(audio_tracks) if len(audio_tracks) > 1 else (audio_tracks[0] if audio_tracks else None)
     progress(0.4, desc="Setting up video...")
     if video_quality == "High":
         target_height, bitrate, preset, crf = 1080, "8000k", "veryfast", "20"
     elif video_quality == "Standard":
         target_height, bitrate, preset, crf = 720, "4000k", "veryfast", "24"
     else:
         target_height, bitrate, preset, crf = 480, "1000k", "ultrafast", "28"
     progress(0.5, desc="Processing clips...")
     video_clips = []
     current_duration = 0
     file_index = 0
+    while current_duration < target_duration_seconds:
         if file_index >= len(all_files):
             file_index = 0
             random.shuffle(all_files)
         video_file = all_files[file_index]
         file_index += 1
         try:
             full_clip = VideoFileClip(os.path.join(source_path, video_file))
             if full_clip.h != target_height:
                 aspect_ratio = full_clip.w / full_clip.h
                 new_width = int(target_height * aspect_ratio)
                 full_clip = full_clip.resize((new_width, adjusted_height))
             else:
                 full_clip = ensure_even_dimensions(full_clip)
             subclip = get_random_subclip_and_slow(full_clip)
             remaining_duration = target_duration_seconds - current_duration
             if subclip.duration > remaining_duration:
                 subclip = subclip.subclip(0, remaining_duration)
             video_clips.append(ensure_even_dimensions(subclip))
             current_duration += subclip.duration
         except Exception as e:
+            print(f"Error processing {video_file}: {e}")
             continue
     if not video_clips:
         return None, "No clips processed"
+    # Ensure exact duration match
     total_video_duration = sum(clip.duration for clip in video_clips)
     duration_diff = total_video_duration - target_duration_seconds
     if abs(duration_diff) > 0.1:
         if duration_diff > 0:
             trim_amount = duration_diff
+            video_clips[-1] = video_clips[-1].subclip(0, video_clips[-1].duration - trim_amount)
         else:
             extend_amount = abs(duration_diff)
+            video_clips[-1] = video_clips[-1].fx(vfx.loop, duration=video_clips[-1].duration + extend_amount)
     progress(0.6, desc="Applying transitions...")
     transition_duration = {"Snap Cut": 0.1, "Whip Pan": 0.3, "Dreamy Fade": 0.8, "Smooth Blend": 0.5, "Ken Burns Zoom": 0.5}.get(transition_type, 0.5)
     processed_clips = []
     for i in range(len(video_clips)):
         if i == 0:
         else:
             _, clip_with_transition = apply_transition_effect(video_clips[i-1], video_clips[i], transition_type, transition_duration)
             processed_clips.append(clip_with_transition)
     progress(0.7, desc="Concatenating...")
     if transition_type == "Snap Cut":
         final_video_only = concatenate_videoclips(processed_clips, method="compose")
     else:
         final_video_only = concatenate_videoclips(processed_clips, method="compose", padding=-transition_duration)
     final_video_only = ensure_even_dimensions(final_video_only)
+    # Fix black screen - loop if needed
+    if final_audio and final_video_only.duration < final_audio.duration:
+        final_video_only = final_video_only.fx(vfx.loop, duration=final_audio.duration)
     progress(0.8, desc="Adding overlays...")
+    # Create subtitle clips using reliable method
     all_clips = [final_video_only.set_opacity(0.65)]
+    if linelevel_subtitles:
+        print(f"Creating {len(linelevel_subtitles)} subtitle sections")
+        subtitle_clips = create_simple_subtitles(linelevel_subtitles, final_video_only.size, final_video_only.duration)
+        all_clips.extend(subtitle_clips)
+        print(f"Added {len(subtitle_clips)} subtitle clips")
     if title_text and title_text.strip():
         title_clips = create_title_overlay(title_text, final_video_only.size, duration=4)
         all_clips.extend(title_clips)
     final_video = CompositeVideoClip(all_clips)
     if final_audio:
         final_video = final_video.set_audio(final_audio)
     progress(0.9, desc="Exporting...")
     output_filename = f'video_{timestamp}.mp4'
     final_output_path = os.path.join(output_path, output_filename)
     try:
         final_video.write_videofile(
             final_output_path,
             bitrate=bitrate,
             audio_bitrate="128k",
             threads=8,
+            ffmpeg_params=["-crf", crf, "-pix_fmt", "yuv420p", "-movflags", "+faststart"]
         )
     except Exception as e:
         return None, f"Export error: {str(e)}"
     progress(1.0, desc="Done")
     try:
         final_video.close()
         if voice_over_audio:
             voice_over_audio.close()
     except:
         pass
+    audio_source = "TTS" if text_input else ("Uploaded" if audio_input else "BGM")
+    summary = f"Complete\n{output_filename}\n{audio_source}\n{target_duration_seconds:.1f}s\n{len(linelevel_subtitles) if linelevel_subtitles else 0} subs"
     return final_output_path, summary
 with gr.Blocks(title="Video Generator", theme=gr.themes.Soft()) as interface:
     gr.Markdown("# 🎬 AI Video Generator")
     with gr.Row():
         with gr.Column():
+            text_input = gr.Textbox(label="Text for TTS", lines=4)
             voice_dropdown = gr.Dropdown(
                 choices=[(f"{v['name']} - {v['description']}", k) for k, v in AVAILABLE_VOICES.items()],
                 value="Puck",
+                label="Voice"
             )
+            audio_input = gr.Audio(type="filepath", label="Or Upload Audio")
+            title_input = gr.Textbox(label="Title (Optional)", lines=2)
+            duration_slider = gr.Slider(0.5, 10, 2, 0.5, label="Duration (min)")
+            quality_radio = gr.Radio(["High", "Standard", "Preview"], value="High", label="Quality")
             transition_radio = gr.Radio(
                 ["Smooth Blend", "Ken Burns Zoom", "Whip Pan", "Dreamy Fade", "Snap Cut"],
                 value="Smooth Blend",
+                label="Transition"
             )
             with gr.Row():
+                submit_btn = gr.Button("Generate Video", variant="primary")
+                stop_btn = gr.Button("Stop", variant="stop")
         with gr.Column():
+            video_output = gr.Video(label="Output")
             summary_output = gr.Textbox(label="Status", lines=8)
     submit_btn.click(
     stop_btn.click(fn=cancel_generation, outputs=[summary_output, video_output])
 if __name__ == "__main__":
+    interface.launch(server_name="0.0.0.0", server_port=7860, show_error=True)