Spaces:

Shreevathsam
/

Video_generator_tool

Runtime error

App Files Files Community

Shreevathsam commited on Sep 21, 2025

Commit

ddc8837

verified ·

1 Parent(s): c534833

Update app.py

Browse files

Files changed (1) hide show

app.py +323 -157

app.py CHANGED Viewed

@@ -5,22 +5,25 @@ import whisper
 import shutil
 import wave
 import base64
-from moviepy.editor import (VideoFileClip, AudioFileClip, concatenate_videoclips,
-                            CompositeVideoClip, CompositeAudioClip, ImageClip)
 import moviepy.audio.fx.all as afx
 import moviepy.video.fx.all as vfx
 import gradio as gr
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 import urllib.request
 from google import genai
 from google.genai import types
 os.makedirs('video_clips', exist_ok=True)
 os.makedirs('background_music', exist_ok=True)
 os.makedirs('voice_over', exist_ok=True)
 os.makedirs('exports', exist_ok=True)
 GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY', '')
 if GOOGLE_API_KEY:
     os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
@@ -74,7 +77,7 @@ def generate_tts_audio(text_input, voice_name="Puck"):
         return None, f"Error: {str(e)}"
 def split_text_into_lines(data):
-    MaxChars, MaxDuration, MaxGap = 40, 2.5, 1.5
     subtitles, line, line_duration = [], [], 0
     for idx, word_data in enumerate(data):
         line.append(word_data)
@@ -101,100 +104,188 @@ def split_text_into_lines(data):
         })
     return subtitles
-def create_subtitle_image(text, frame_size, fontsize=42):
-    """Create subtitle as PIL Image - more reliable than TextClip"""
-    frame_width, frame_height = frame_size
-    # Load font
-    FONT_PATH = None
-    try:
-        FONT_URL = "https://github.com/google/fonts/raw/main/ofl/poppins/Poppins-Bold.ttf"
-        FONT_PATH = "/tmp/Poppins-Bold.ttf"
-        if not os.path.exists(FONT_PATH):
-            urllib.request.urlretrieve(FONT_URL, FONT_PATH)
-        font = ImageFont.truetype(FONT_PATH, fontsize)
-    except:
-        font = ImageFont.load_default()
-    # Create transparent image
-    img = Image.new('RGBA', (frame_width, frame_height), (0, 0, 0, 0))
-    draw = ImageDraw.Draw(img)
-    # Get text size
-    bbox = draw.textbbox((0, 0), text.upper(), font=font)
-    text_width = bbox[2] - bbox[0]
-    text_height = bbox[3] - bbox[1]
-    # Position at bottom center
-    x = (frame_width - text_width) // 2
-    y = int(frame_height * 0.75)
-    # Draw background
-    padding = 20
-    bg_x1 = x - padding
-    bg_y1 = y - padding
-    bg_x2 = x + text_width + padding
-    bg_y2 = y + text_height + padding
-    draw.rounded_rectangle([bg_x1, bg_y1, bg_x2, bg_y2], radius=15, fill=(0, 0, 0, 180))
-    # Draw text with shadow
-    draw.text((x+2, y+2), text.upper(), font=font, fill=(0, 0, 0, 255))
-    draw.text((x, y), text.upper(), font=font, fill=(255, 255, 255, 255))
-    return np.array(img)
-def create_simple_subtitles(subtitle_data, frame_size, total_duration):
-    """Create simple, reliable subtitles using ImageClips"""
-    subtitle_clips = []
-    for item in subtitle_data:
-        text = item['word']
-        start_time = item['start']
-        end_time = item['end']
-        duration = end_time - start_time
-        # Create subtitle image
-        img_array = create_subtitle_image(text, frame_size)
-        # Create ImageClip
-        clip = ImageClip(img_array, duration=duration)
-        clip = clip.set_start(start_time)
-        subtitle_clips.append(clip)
-    return subtitle_clips
 def create_title_overlay(title_text, framesize, duration=4):
     if not title_text or not title_text.strip():
         return []
     frame_width, frame_height = framesize
-    try:
-        FONT_URL = "https://github.com/google/fonts/raw/main/ofl/poppins/Poppins-Bold.ttf"
-        FONT_PATH = "/tmp/Poppins-Bold.ttf"
-        if not os.path.exists(FONT_PATH):
             urllib.request.urlretrieve(FONT_URL, FONT_PATH)
-        font = ImageFont.truetype(FONT_PATH, int(frame_height * 0.06))
-    except:
-        font = ImageFont.load_default()
     base = Image.new("RGBA", (frame_width, frame_height), (0, 0, 0, 0))
     draw = ImageDraw.Draw(base)
-    # Simple centered title
-    text = title_text.upper()
-    bbox = draw.textbbox((0, 0), text, font=font)
-    text_width = bbox[2] - bbox[0]
-    x = (frame_width - text_width) // 2
-    y = int(frame_height * 0.1)
-    # Shadow and text
-    draw.text((x+3, y+3), text, font=font, fill=(0, 0, 0, 200))
-    draw.text((x, y), text, font=font, fill=(255, 255, 255, 255))
     return [ImageClip(np.array(base), duration=duration)]
 def get_random_subclip_and_slow(clip):
     subclip_durations = [2, 3, 4]
     subclip_duration = random.choice(subclip_durations)
@@ -278,12 +369,14 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
     generation_cancelled = False
     current_video_clip = None
     progress(0, desc="Starting...")
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     source_path = 'video_clips'
     if not os.path.isdir(source_path):
         return None, "Video clips folder not found"
     output_path = 'exports'
     os.makedirs(output_path, exist_ok=True)
@@ -291,25 +384,30 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
     all_files = [f for f in os.listdir(source_path) if f.lower().endswith(video_extensions)]
     if not all_files:
         return None, "No video files found"
     random.shuffle(all_files)
     bg_music_path = None
     bg_music_folder_path = 'background_music'
     if os.path.isdir(bg_music_folder_path):
-        audio_extensions = ('.mp3', '.wav', '.aac')
-        possible_files = [f for f in os.listdir(bg_music_folder_path) if f.lower().endswith(audio_extensions)]
-        if possible_files:
             bg_music_path = os.path.join(bg_music_folder_path, possible_files[0])
-    voice_over_path = None
     linelevel_subtitles = None
     if text_input and text_input.strip():
         progress(0.1, desc="Generating TTS...")
         voice_name = AVAILABLE_VOICES[voice_selection]["name"] if voice_selection in AVAILABLE_VOICES else "Puck"
         tts_path, tts_message = generate_tts_audio(text_input, voice_name)
         if tts_path:
             voice_over_folder_path = 'voice_over'
             os.makedirs(voice_over_folder_path, exist_ok=True)
             voice_filename = f"tts_voiceover_{timestamp}.wav"
@@ -319,70 +417,87 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
         else:
             return None, f"TTS failed: {tts_message}"
     elif audio_input:
         voice_over_folder_path = 'voice_over'
         os.makedirs(voice_over_folder_path, exist_ok=True)
         voice_filename = f"uploaded_voiceover_{timestamp}.mp3"
         saved_voice_path = os.path.join(voice_over_folder_path, voice_filename)
         shutil.copy2(audio_input, saved_voice_path)
         voice_over_path = saved_voice_path
     if voice_over_path:
         try:
             progress(0.2, desc="Processing audio...")
             voice_over_audio = AudioFileClip(voice_over_path)
             target_duration_seconds = voice_over_audio.duration
             linelevel_subtitles, _ = process_voiceover_to_subtitles(voice_over_path)
         except Exception as e:
             return None, f"Audio error: {str(e)}"
     else:
         if not bg_music_path:
             return None, "Need text/audio or background music"
         target_duration_seconds = duration_minutes * 60
-        voice_over_audio = None
     progress(0.3, desc="Preparing audio...")
     audio_tracks = []
     if voice_over_audio:
         audio_tracks.append(voice_over_audio)
     if bg_music_path:
         try:
             background_audio = AudioFileClip(bg_music_path)
             background_audio = background_audio.fx(afx.volumex, 0.10)
             background_audio = background_audio.fx(afx.audio_loop, duration=target_duration_seconds)
             audio_tracks.append(background_audio)
         except Exception as e:
             print(f"Background music error: {e}")
     final_audio = CompositeAudioClip(audio_tracks) if len(audio_tracks) > 1 else (audio_tracks[0] if audio_tracks else None)
     progress(0.4, desc="Setting up video...")
     if video_quality == "High":
         target_height, bitrate, preset, crf = 1080, "8000k", "veryfast", "20"
     elif video_quality == "Standard":
         target_height, bitrate, preset, crf = 720, "4000k", "veryfast", "24"
     else:
         target_height, bitrate, preset, crf = 480, "1000k", "ultrafast", "28"
     progress(0.5, desc="Processing clips...")
     video_clips = []
     current_duration = 0
     file_index = 0
-    while current_duration < target_duration_seconds:
         if file_index >= len(all_files):
             file_index = 0
             random.shuffle(all_files)
         video_file = all_files[file_index]
         file_index += 1
         try:
             full_clip = VideoFileClip(os.path.join(source_path, video_file))
             if full_clip.h != target_height:
                 aspect_ratio = full_clip.w / full_clip.h
                 new_width = int(target_height * aspect_ratio)
@@ -392,37 +507,42 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
                 full_clip = full_clip.resize((new_width, adjusted_height))
             else:
                 full_clip = ensure_even_dimensions(full_clip)
             subclip = get_random_subclip_and_slow(full_clip)
             remaining_duration = target_duration_seconds - current_duration
             if subclip.duration > remaining_duration:
                 subclip = subclip.subclip(0, remaining_duration)
             video_clips.append(ensure_even_dimensions(subclip))
             current_duration += subclip.duration
         except Exception as e:
-            print(f"Error processing {video_file}: {e}")
             continue
     if not video_clips:
         return None, "No clips processed"
-    # Ensure exact duration match
     total_video_duration = sum(clip.duration for clip in video_clips)
     duration_diff = total_video_duration - target_duration_seconds
     if abs(duration_diff) > 0.1:
         if duration_diff > 0:
             trim_amount = duration_diff
-            video_clips[-1] = video_clips[-1].subclip(0, video_clips[-1].duration - trim_amount)
         else:
             extend_amount = abs(duration_diff)
-            video_clips[-1] = video_clips[-1].fx(vfx.loop, duration=video_clips[-1].duration + extend_amount)
     progress(0.6, desc="Applying transitions...")
     transition_duration = {"Snap Cut": 0.1, "Whip Pan": 0.3, "Dreamy Fade": 0.8, "Smooth Blend": 0.5, "Ken Burns Zoom": 0.5}.get(transition_type, 0.5)
     processed_clips = []
     for i in range(len(video_clips)):
         if i == 0:
@@ -437,45 +557,70 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
         else:
             _, clip_with_transition = apply_transition_effect(video_clips[i-1], video_clips[i], transition_type, transition_duration)
             processed_clips.append(clip_with_transition)
     progress(0.7, desc="Concatenating...")
     if transition_type == "Snap Cut":
         final_video_only = concatenate_videoclips(processed_clips, method="compose")
     else:
         final_video_only = concatenate_videoclips(processed_clips, method="compose", padding=-transition_duration)
     final_video_only = ensure_even_dimensions(final_video_only)
-    # Fix black screen - loop if needed
     if final_audio and final_video_only.duration < final_audio.duration:
         final_video_only = final_video_only.fx(vfx.loop, duration=final_audio.duration)
     progress(0.8, desc="Adding overlays...")
-    # Create subtitle clips using reliable method
-    all_clips = [final_video_only.set_opacity(0.65)]
     if linelevel_subtitles:
-        print(f"Creating {len(linelevel_subtitles)} subtitle sections")
-        subtitle_clips = create_simple_subtitles(linelevel_subtitles, final_video_only.size, final_video_only.duration)
-        all_clips.extend(subtitle_clips)
-        print(f"Added {len(subtitle_clips)} subtitle clips")
     if title_text and title_text.strip():
         title_clips = create_title_overlay(title_text, final_video_only.size, duration=4)
         all_clips.extend(title_clips)
     final_video = CompositeVideoClip(all_clips)
     if final_audio:
         final_video = final_video.set_audio(final_audio)
     progress(0.9, desc="Exporting...")
     output_filename = f'video_{timestamp}.mp4'
     final_output_path = os.path.join(output_path, output_filename)
     try:
         final_video.write_videofile(
             final_output_path,
@@ -486,50 +631,66 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
             bitrate=bitrate,
             audio_bitrate="128k",
             threads=8,
-            ffmpeg_params=["-crf", crf, "-pix_fmt", "yuv420p", "-movflags", "+faststart"]
         )
     except Exception as e:
         return None, f"Export error: {str(e)}"
     progress(1.0, desc="Done")
     try:
         final_video.close()
         if voice_over_audio:
             voice_over_audio.close()
     except:
         pass
-    audio_source = "TTS" if text_input else ("Uploaded" if audio_input else "BGM")
-    summary = f"Complete\n{output_filename}\n{audio_source}\n{target_duration_seconds:.1f}s\n{len(linelevel_subtitles) if linelevel_subtitles else 0} subs"
     return final_output_path, summary
 with gr.Blocks(title="Video Generator", theme=gr.themes.Soft()) as interface:
     gr.Markdown("# 🎬 AI Video Generator")
     with gr.Row():
         with gr.Column():
-            text_input = gr.Textbox(label="Text for TTS", lines=4)
             voice_dropdown = gr.Dropdown(
                 choices=[(f"{v['name']} - {v['description']}", k) for k, v in AVAILABLE_VOICES.items()],
                 value="Puck",
-                label="Voice"
             )
-            audio_input = gr.Audio(type="filepath", label="Or Upload Audio")
-            title_input = gr.Textbox(label="Title (Optional)", lines=2)
-            duration_slider = gr.Slider(0.5, 10, 2, 0.5, label="Duration (min)")
-            quality_radio = gr.Radio(["High", "Standard", "Preview"], value="High", label="Quality")
             transition_radio = gr.Radio(
                 ["Smooth Blend", "Ken Burns Zoom", "Whip Pan", "Dreamy Fade", "Snap Cut"],
                 value="Smooth Blend",
-                label="Transition"
             )
             with gr.Row():
-                submit_btn = gr.Button("Generate Video", variant="primary")
-                stop_btn = gr.Button("Stop", variant="stop")
         with gr.Column():
-            video_output = gr.Video(label="Output")
             summary_output = gr.Textbox(label="Status", lines=8)
     submit_btn.click(
@@ -539,5 +700,10 @@ with gr.Blocks(title="Video Generator", theme=gr.themes.Soft()) as interface:
     )
     stop_btn.click(fn=cancel_generation, outputs=[summary_output, video_output])
 if __name__ == "__main__":
-    interface.launch(server_name="0.0.0.0", server_port=7860, show_error=True)

 import shutil
 import wave
 import base64
+from moviepy.editor import (VideoFileClip, AudioFileClip, TextClip,
+                            concatenate_videoclips, CompositeVideoClip, CompositeAudioClip, ImageClip)
 import moviepy.audio.fx.all as afx
 import moviepy.video.fx.all as vfx
 import gradio as gr
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
+from functools import lru_cache
 import urllib.request
 from google import genai
 from google.genai import types
+# CHANGED: Create local directories instead of Google Drive paths
 os.makedirs('video_clips', exist_ok=True)
 os.makedirs('background_music', exist_ok=True)
 os.makedirs('voice_over', exist_ok=True)
 os.makedirs('exports', exist_ok=True)
+# CHANGED: Get API key from environment variable (secure method)
 GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY', '')
 if GOOGLE_API_KEY:
     os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
         return None, f"Error: {str(e)}"
 def split_text_into_lines(data):
+    MaxChars, MaxDuration, MaxGap = 60, 2.5, 1.5
     subtitles, line, line_duration = [], [], 0
     for idx, word_data in enumerate(data):
         line.append(word_data)
         })
     return subtitles
+@lru_cache(maxsize=1000)
+def get_cached_text_clip(text, font, fontsize, color):
+    return TextClip(text, font=font, fontsize=fontsize, color=color)
 def create_title_overlay(title_text, framesize, duration=4):
     if not title_text or not title_text.strip():
         return []
     frame_width, frame_height = framesize
+    FONT_URL = "https://github.com/google/fonts/raw/main/ofl/poppins/Poppins-Bold.ttf"
+    FONT_PATH = "/tmp/Poppins-Bold.ttf"
+    if not os.path.exists(FONT_PATH):
+        try:
             urllib.request.urlretrieve(FONT_URL, FONT_PATH)
+        except:
+            FONT_PATH = None
+    TOP_MARGIN = int(frame_height * 0.115)
+    FONT_SIZE = int(frame_height * 0.042)
+    STROKE_WIDTH = max(1, int(frame_height * 0.003))
+    LINE_SPACING = max(4, int(frame_height * 0.008))
+    def load_font(size):
+        try:
+            if FONT_PATH and os.path.exists(FONT_PATH):
+                return ImageFont.truetype(FONT_PATH, size)
+            return ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", size)
+        except:
+            return ImageFont.load_default()
+    font_obj = load_font(FONT_SIZE)
     base = Image.new("RGBA", (frame_width, frame_height), (0, 0, 0, 0))
+    temp_img = Image.new("RGBA", (frame_width, frame_height), (0,0,0,0))
+    temp_draw = ImageDraw.Draw(temp_img)
+    def measure_text(text, font):
+        try:
+            bbox = temp_draw.textbbox((0,0), text, font=font, stroke_width=STROKE_WIDTH)
+            return bbox[2]-bbox[0], bbox[3]-bbox[1]
+        except:
+            return 100, 50
+    def wrap_text(text, font, max_width):
+        words = text.upper().split()
+        lines, current = [], []
+        for word in words:
+            test_line = " ".join(current + [word])
+            w, _ = measure_text(test_line, font)
+            if w <= max_width:
+                current.append(word)
+            else:
+                if current:
+                    lines.append(" ".join(current))
+                    current = [word]
+                else:
+                    lines.append(word)
+                    current = []
+        if current:
+            lines.append(" ".join(current))
+        return lines[:4]
+    lines = wrap_text(title_text, font_obj, frame_width * 0.90)
+    line_heights = [measure_text(line, font_obj)[1] for line in lines]
+    y_start = TOP_MARGIN
+    x_center = frame_width // 2
     draw = ImageDraw.Draw(base)
+    y = y_start
+    for i, line in enumerate(lines):
+        w, h = measure_text(line, font_obj)
+        x = x_center - w // 2
+        draw.text((x+2, y+2), line, font=font_obj, fill=(0,0,0,180))
+        draw.text((x, y), line, font=font_obj, fill=(255,255,255,255), stroke_width=STROKE_WIDTH, stroke_fill=(0,0,0,255))
+        y += line_heights[i] + LINE_SPACING
     return [ImageClip(np.array(base), duration=duration)]
+def create_caption(textJSON, framesize, font="Helvetica-Bold", fontsize=14, color='white'):
+    full_duration = textJSON['end'] - textJSON['start']
+    word_clips = []
+    xy_textclips_positions = []
+    frame_width, frame_height = framesize
+    max_line_width = frame_width * 0.8
+    lines, current_line, current_line_width = [], [], 0
+    for wordJSON in textJSON['textcontents']:
+        word_upper = wordJSON['word'].upper()
+        temp_word = get_cached_text_clip(word_upper, font, fontsize, color)
+        temp_space = get_cached_text_clip(" ", font, fontsize, color)
+        word_width, word_height = temp_word.size
+        space_width, _ = temp_space.size
+        if current_line_width + word_width + space_width > max_line_width and current_line:
+            lines.append({'words': current_line.copy(), 'width': current_line_width, 'height': word_height})
+            current_line = [wordJSON]
+            current_line_width = word_width + space_width
+        else:
+            current_line.append(wordJSON)
+            current_line_width += word_width + space_width
+    if current_line:
+        word_upper = current_line[0]['word'].upper()
+        temp_word = get_cached_text_clip(word_upper, font, fontsize, color)
+        _, word_height = temp_word.size
+        lines.append({'words': current_line, 'width': current_line_width, 'height': word_height})
+    total_text_height = sum(line['height'] for line in lines) + (len(lines) - 1) * 3
+    subtitle_y_position = int(frame_height * 0.65)
+    current_y = subtitle_y_position
+    if lines:
+        shadow_padding = 25
+        shadow_height_extra = 15
+        total_subtitle_width = max(line['width'] for line in lines)
+        bg_width = int(total_subtitle_width + shadow_padding * 2)
+        bg_height = int(total_text_height + shadow_height_extra * 2)
+        img = Image.new('RGBA', (bg_width, bg_height), (0, 0, 0, 0))
+        draw = ImageDraw.Draw(img)
+        draw.rounded_rectangle([(0, 0), (bg_width-1, bg_height-1)], radius=15, fill=(0, 0, 0, 128))
+        img_array = np.array(img)
+        shadow_bg = ImageClip(img_array, duration=full_duration).set_start(textJSON['start'])
+        shadow_x = (frame_width - total_subtitle_width) / 2 - shadow_padding
+        shadow_y = subtitle_y_position - shadow_height_extra
+        shadow_bg = shadow_bg.set_position((shadow_x, shadow_y))
+        word_clips.append(shadow_bg)
+    for line in lines:
+        line_words = line['words']
+        word_dimensions = []
+        for wordJSON in line_words:
+            word_upper = wordJSON['word'].upper()
+            temp_word = get_cached_text_clip(word_upper, font, fontsize, color)
+            temp_space = get_cached_text_clip(" ", font, fontsize, color)
+            word_width, word_height = temp_word.size
+            space_width, _ = temp_space.size
+            word_dimensions.append({
+                'word_data': wordJSON,
+                'word_width': word_width,
+                'word_height': word_height,
+                'space_width': space_width,
+                'word_upper': word_upper
+            })
+        line_start_x = (frame_width - line['width']) / 2
+        current_x = line_start_x
+        for word_dim in word_dimensions:
+            wordJSON = word_dim['word_data']
+            word_width = word_dim['word_width']
+            word_height = word_dim['word_height']
+            space_width = word_dim['space_width']
+            word_upper = word_dim['word_upper']
+            shadow_text = get_cached_text_clip(word_upper, font, fontsize, 'black')
+            shadow_text = shadow_text.set_start(textJSON['start']).set_duration(full_duration)
+            shadow_text = shadow_text.set_position((current_x + 1, current_y + 1)).set_opacity(0.3)
+            word_clips.append(shadow_text)
+            word_clip = get_cached_text_clip(word_upper, font, fontsize, color)
+            word_clip = word_clip.set_start(textJSON['start']).set_duration(full_duration)
+            word_clip = word_clip.set_position((current_x, current_y))
+            space_clip = get_cached_text_clip(" ", font, fontsize, color)
+            space_clip = space_clip.set_start(textJSON['start']).set_duration(full_duration)
+            space_clip = space_clip.set_position((current_x + word_width, current_y))
+            xy_textclips_positions.append({
+                "x_pos": current_x,
+                "y_pos": current_y,
+                "width": word_width,
+                "height": word_height,
+                "word": word_upper,
+                "start": wordJSON['start'],
+                "end": wordJSON['end'],
+                "duration": wordJSON['end'] - wordJSON['start']
+            })
+            word_clips.append(word_clip)
+            word_clips.append(space_clip)
+            current_x += word_width + space_width
+        current_y += line['height'] + 3
+    for highlight_word in xy_textclips_positions:
+        bg_width = int(highlight_word['width'] + 16)
+        bg_height = int(highlight_word['height'] + 8)
+        img = Image.new('RGBA', (bg_width, bg_height), (0, 0, 0, 0))
+        draw = ImageDraw.Draw(img)
+        draw.rounded_rectangle([(0, 0), (bg_width-1, bg_height-1)], radius=8, fill=(147, 0, 211, 180))
+        img_array = np.array(img)
+        bg_clip = ImageClip(img_array, duration=highlight_word['duration'])
+        bg_clip = bg_clip.set_start(highlight_word['start'])
+        bg_x = highlight_word['x_pos'] - 8
+        bg_y = highlight_word['y_pos'] - 4
+        bg_clip = bg_clip.set_position((bg_x, bg_y))
+        shadow_highlight = get_cached_text_clip(highlight_word['word'], font, fontsize, 'black')
+        shadow_highlight = shadow_highlight.set_start(highlight_word['start']).set_duration(highlight_word['duration'])
+        shadow_highlight = shadow_highlight.set_position((highlight_word['x_pos'] + 1, highlight_word['y_pos'] + 1)).set_opacity(0.4)
+        word_clip_highlight = get_cached_text_clip(highlight_word['word'], font, fontsize, 'white')
+        word_clip_highlight = word_clip_highlight.set_start(highlight_word['start']).set_duration(highlight_word['duration'])
+        word_clip_highlight = word_clip_highlight.set_position((highlight_word['x_pos'], highlight_word['y_pos']))
+        word_clips.append(bg_clip)
+        word_clips.append(shadow_highlight)
+        word_clips.append(word_clip_highlight)
+    return word_clips
 def get_random_subclip_and_slow(clip):
     subclip_durations = [2, 3, 4]
     subclip_duration = random.choice(subclip_durations)
     generation_cancelled = False
     current_video_clip = None
     progress(0, desc="Starting...")
+    if generation_cancelled:
+        return None, "Generation cancelled"
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    # CHANGED: Use local paths instead of Google Drive
     source_path = 'video_clips'
     if not os.path.isdir(source_path):
         return None, "Video clips folder not found"
     output_path = 'exports'
     os.makedirs(output_path, exist_ok=True)
     all_files = [f for f in os.listdir(source_path) if f.lower().endswith(video_extensions)]
     if not all_files:
         return None, "No video files found"
     random.shuffle(all_files)
+    if generation_cancelled:
+        return None, "Generation cancelled"
     bg_music_path = None
+    # CHANGED: Use local background_music folder
     bg_music_folder_path = 'background_music'
     if os.path.isdir(bg_music_folder_path):
+        audio_extensions = ('.mp3', '.wav', '.m4a', '.aac')
+        possible_files = [f for f in os.listdir(bg_music_folder_path) if f.lower().endswith(audio_extensions) and not f.startswith('voiceover_')]
+        if len(possible_files) >= 1:
             bg_music_path = os.path.join(bg_music_folder_path, possible_files[0])
+    target_duration_seconds = 0
+    voice_over_audio = None
     linelevel_subtitles = None
+    voice_over_path = None
     if text_input and text_input.strip():
         progress(0.1, desc="Generating TTS...")
         voice_name = AVAILABLE_VOICES[voice_selection]["name"] if voice_selection in AVAILABLE_VOICES else "Puck"
         tts_path, tts_message = generate_tts_audio(text_input, voice_name)
+        if generation_cancelled:
+            return None, "Generation cancelled"
         if tts_path:
+            # CHANGED: Use local voice_over folder
             voice_over_folder_path = 'voice_over'
             os.makedirs(voice_over_folder_path, exist_ok=True)
             voice_filename = f"tts_voiceover_{timestamp}.wav"
         else:
             return None, f"TTS failed: {tts_message}"
     elif audio_input:
+        if generation_cancelled:
+            return None, "Generation cancelled"
         voice_over_folder_path = 'voice_over'
         os.makedirs(voice_over_folder_path, exist_ok=True)
         voice_filename = f"uploaded_voiceover_{timestamp}.mp3"
         saved_voice_path = os.path.join(voice_over_folder_path, voice_filename)
         shutil.copy2(audio_input, saved_voice_path)
         voice_over_path = saved_voice_path
     if voice_over_path:
         try:
             progress(0.2, desc="Processing audio...")
+            if generation_cancelled:
+                return None, "Generation cancelled"
             voice_over_audio = AudioFileClip(voice_over_path)
             target_duration_seconds = voice_over_audio.duration
             linelevel_subtitles, _ = process_voiceover_to_subtitles(voice_over_path)
+            if generation_cancelled:
+                voice_over_audio.close()
+                return None, "Generation cancelled"
         except Exception as e:
             return None, f"Audio error: {str(e)}"
     else:
         if not bg_music_path:
             return None, "Need text/audio or background music"
         target_duration_seconds = duration_minutes * 60
     progress(0.3, desc="Preparing audio...")
+    if generation_cancelled:
+        if voice_over_audio:
+            voice_over_audio.close()
+        return None, "Generation cancelled"
     audio_tracks = []
     if voice_over_audio:
         audio_tracks.append(voice_over_audio)
     if bg_music_path:
         try:
             background_audio = AudioFileClip(bg_music_path)
+            # CHANGED: Increased volume from 0.015 to 0.10 (louder background music)
             background_audio = background_audio.fx(afx.volumex, 0.10)
             background_audio = background_audio.fx(afx.audio_loop, duration=target_duration_seconds)
             audio_tracks.append(background_audio)
         except Exception as e:
             print(f"Background music error: {e}")
     final_audio = CompositeAudioClip(audio_tracks) if len(audio_tracks) > 1 else (audio_tracks[0] if audio_tracks else None)
     progress(0.4, desc="Setting up video...")
+    if generation_cancelled:
+        cleanup_resources()
+        return None, "Generation cancelled"
     if video_quality == "High":
         target_height, bitrate, preset, crf = 1080, "8000k", "veryfast", "20"
     elif video_quality == "Standard":
         target_height, bitrate, preset, crf = 720, "4000k", "veryfast", "24"
     else:
         target_height, bitrate, preset, crf = 480, "1000k", "ultrafast", "28"
     progress(0.5, desc="Processing clips...")
     video_clips = []
     current_duration = 0
     file_index = 0
+    safety_counter = 0
+    max_iterations = len(all_files) * 3
+    while current_duration < target_duration_seconds and safety_counter < max_iterations:
+        if generation_cancelled:
+            for clip in video_clips:
+                try:
+                    clip.close()
+                except:
+                    pass
+            cleanup_resources()
+            return None, "Generation cancelled"
         if file_index >= len(all_files):
             file_index = 0
             random.shuffle(all_files)
         video_file = all_files[file_index]
         file_index += 1
+        safety_counter += 1
         try:
             full_clip = VideoFileClip(os.path.join(source_path, video_file))
+            current_video_clip = full_clip
+            if generation_cancelled:
+                full_clip.close()
+                cleanup_resources()
+                return None, "Generation cancelled"
             if full_clip.h != target_height:
                 aspect_ratio = full_clip.w / full_clip.h
                 new_width = int(target_height * aspect_ratio)
                 full_clip = full_clip.resize((new_width, adjusted_height))
             else:
                 full_clip = ensure_even_dimensions(full_clip)
             subclip = get_random_subclip_and_slow(full_clip)
             remaining_duration = target_duration_seconds - current_duration
             if subclip.duration > remaining_duration:
                 subclip = subclip.subclip(0, remaining_duration)
             video_clips.append(ensure_even_dimensions(subclip))
             current_duration += subclip.duration
+            progress(0.5 + (safety_counter * 0.1 / max_iterations), desc=f"Clip {len(video_clips)}")
         except Exception as e:
+            print(f"Error: {e}")
             continue
+    if generation_cancelled:
+        for clip in video_clips:
+            try:
+                clip.close()
+            except:
+                pass
+        cleanup_resources()
+        return None, "Generation cancelled"
     if not video_clips:
         return None, "No clips processed"
+    # FIXED: Ensure exact duration match to prevent black screens
     total_video_duration = sum(clip.duration for clip in video_clips)
     duration_diff = total_video_duration - target_duration_seconds
     if abs(duration_diff) > 0.1:
         if duration_diff > 0:
             trim_amount = duration_diff
+            new_last_clip = video_clips[-1].subclip(0, video_clips[-1].duration - trim_amount)
+            video_clips[-1] = new_last_clip
         else:
             extend_amount = abs(duration_diff)
+            new_last_clip = video_clips[-1].fx(vfx.loop, duration=video_clips[-1].duration + extend_amount)
+            video_clips[-1] = new_last_clip
     progress(0.6, desc="Applying transitions...")
     transition_duration = {"Snap Cut": 0.1, "Whip Pan": 0.3, "Dreamy Fade": 0.8, "Smooth Blend": 0.5, "Ken Burns Zoom": 0.5}.get(transition_type, 0.5)
     processed_clips = []
     for i in range(len(video_clips)):
         if i == 0:
         else:
             _, clip_with_transition = apply_transition_effect(video_clips[i-1], video_clips[i], transition_type, transition_duration)
             processed_clips.append(clip_with_transition)
     progress(0.7, desc="Concatenating...")
+    if generation_cancelled:
+        for c in processed_clips:
+            try:
+                c.close()
+            except:
+                pass
+        cleanup_resources()
+        return None, "Generation cancelled"
     if transition_type == "Snap Cut":
         final_video_only = concatenate_videoclips(processed_clips, method="compose")
     else:
         final_video_only = concatenate_videoclips(processed_clips, method="compose", padding=-transition_duration)
     final_video_only = ensure_even_dimensions(final_video_only)
+    current_video_clip = final_video_only
+    # FIXED: Loop video if shorter than audio to prevent black screen
     if final_audio and final_video_only.duration < final_audio.duration:
         final_video_only = final_video_only.fx(vfx.loop, duration=final_audio.duration)
     progress(0.8, desc="Adding overlays...")
+    if generation_cancelled:
+        try:
+            final_video_only.close()
+        except:
+            pass
+        cleanup_resources()
+        return None, "Generation cancelled"
+    all_subtitle_clips = []
     if linelevel_subtitles:
+        for line in linelevel_subtitles:
+            if generation_cancelled:
+                try:
+                    final_video_only.close()
+                except:
+                    pass
+                cleanup_resources()
+                return None, "Generation cancelled"
+            try:
+                subtitle_fontsize = min(42, final_video_only.size[1] // 25)
+                all_subtitle_clips.extend(create_caption(line, final_video_only.size, font="Helvetica-Bold", fontsize=subtitle_fontsize, color='white'))
+            except Exception as e:
+                print(f"Subtitle error: {e}")
+                continue
+    all_clips = [final_video_only.set_opacity(0.65)]
+    if all_subtitle_clips:
+        all_clips.extend(all_subtitle_clips)
     if title_text and title_text.strip():
         title_clips = create_title_overlay(title_text, final_video_only.size, duration=4)
         all_clips.extend(title_clips)
     final_video = CompositeVideoClip(all_clips)
+    current_video_clip = final_video
     if final_audio:
         final_video = final_video.set_audio(final_audio)
     progress(0.9, desc="Exporting...")
+    if generation_cancelled:
+        try:
+            final_video.close()
+        except:
+            pass
+        cleanup_resources()
+        return None, "Generation cancelled"
     output_filename = f'video_{timestamp}.mp4'
     final_output_path = os.path.join(output_path, output_filename)
     try:
         final_video.write_videofile(
             final_output_path,
             bitrate=bitrate,
             audio_bitrate="128k",
             threads=8,
+            ffmpeg_params=["-crf", crf, "-pix_fmt", "yuv420p", "-movflags", "+faststart", "-tune", "fastdecode"]
         )
     except Exception as e:
+        if generation_cancelled:
+            return None, "Generation cancelled"
         return None, f"Export error: {str(e)}"
     progress(1.0, desc="Done")
+    if generation_cancelled:
+        try:
+            if os.path.exists(final_output_path):
+                os.remove(final_output_path)
+        except:
+            pass
+        cleanup_resources()
+        return None, "Generation cancelled"
     try:
         final_video.close()
         if voice_over_audio:
             voice_over_audio.close()
+        current_video_clip = None
     except:
         pass
+    audio_source = ""
+    if text_input and text_input.strip():
+        audio_source = f"TTS ({AVAILABLE_VOICES[voice_selection]['name'] if voice_selection in AVAILABLE_VOICES else 'Puck'})"
+    elif voice_over_path:
+        audio_source = "Uploaded Audio"
+    else:
+        audio_source = "Background Music"
+    summary = f"Complete\n{output_filename}\n{audio_source}\n{transition_type}\n{target_duration_seconds:.1f}s\n{len(linelevel_subtitles) if linelevel_subtitles else 0} subtitles"
     return final_output_path, summary
+# CHANGED: Removed share=True and debug=True for production
 with gr.Blocks(title="Video Generator", theme=gr.themes.Soft()) as interface:
     gr.Markdown("# 🎬 AI Video Generator")
+    gr.Markdown("Upload video clips to `video_clips` folder and optionally background music to `background_music` folder.")
     with gr.Row():
         with gr.Column():
+            text_input = gr.Textbox(label="Text for TTS", lines=4, placeholder="Enter text to convert to speech...")
             voice_dropdown = gr.Dropdown(
                 choices=[(f"{v['name']} - {v['description']}", k) for k, v in AVAILABLE_VOICES.items()],
                 value="Puck",
+                label="Voice Selection"
             )
+            audio_input = gr.Audio(type="filepath", label="Or Upload Audio File")
+            title_input = gr.Textbox(label="Video Title (Optional)", lines=2, placeholder="Enter video title...")
+            duration_slider = gr.Slider(0.5, 10, 2, 0.5, label="Duration (minutes) - only used if no audio")
+            quality_radio = gr.Radio(["High", "Standard", "Preview"], value="High", label="Video Quality")
             transition_radio = gr.Radio(
                 ["Smooth Blend", "Ken Burns Zoom", "Whip Pan", "Dreamy Fade", "Snap Cut"],
                 value="Smooth Blend",
+                label="Transition Effect"
             )
             with gr.Row():
+                submit_btn = gr.Button("🎥 Generate Video", variant="primary", size="lg")
+                stop_btn = gr.Button("⏹️ Stop", variant="stop", size="lg")
         with gr.Column():
+            video_output = gr.Video(label="Generated Video")
             summary_output = gr.Textbox(label="Status", lines=8)
     submit_btn.click(
     )
     stop_btn.click(fn=cancel_generation, outputs=[summary_output, video_output])
+# CHANGED: Updated launch settings for Hugging Face
 if __name__ == "__main__":
+    interface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )