1.1

Sleeping

App Files Files Community

Shreevathsam commited on Sep 21, 2025

Commit

3e31f54

verified ·

1 Parent(s): 9b6da36

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -8

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ os.makedirs('background_music', exist_ok=True)
 os.makedirs('voice_over', exist_ok=True)
 os.makedirs('exports', exist_ok=True)
-# Get API key from environment variable (will be set in Hugging Face Space settings)
 GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY', '')
 if GOOGLE_API_KEY:
     os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
@@ -369,24 +369,30 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
     generation_cancelled = False
     current_video_clip = None
     progress(0, desc="Starting...")
     if generation_cancelled:
         return None, "Generation cancelled"
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    # Updated paths for Hugging Face
     source_path = 'video_clips'
     if not os.path.isdir(source_path):
         return None, "Video clips folder not found. Please upload video clips to the 'video_clips' folder."
     output_path = 'exports'
     os.makedirs(output_path, exist_ok=True)
     video_extensions = ('.mp4', '.avi', '.mkv', '.mov')
     all_files = [f for f in os.listdir(source_path) if f.lower().endswith(video_extensions)]
     if not all_files:
         return None, "No video files found in 'video_clips' folder"
     random.shuffle(all_files)
     if generation_cancelled:
         return None, "Generation cancelled"
     bg_music_path = None
     bg_music_folder_path = 'background_music'
     if os.path.isdir(bg_music_folder_path):
@@ -394,16 +400,20 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
         possible_files = [f for f in os.listdir(bg_music_folder_path) if f.lower().endswith(audio_extensions) and not f.startswith('voiceover_')]
         if len(possible_files) >= 1:
             bg_music_path = os.path.join(bg_music_folder_path, possible_files[0])
     target_duration_seconds = 0
     voice_over_audio = None
     linelevel_subtitles = None
     voice_over_path = None
     if text_input and text_input.strip():
         progress(0.1, desc="Generating TTS...")
         voice_name = AVAILABLE_VOICES[voice_selection]["name"] if voice_selection in AVAILABLE_VOICES else "Puck"
         tts_path, tts_message = generate_tts_audio(text_input, voice_name)
         if generation_cancelled:
             return None, "Generation cancelled"
         if tts_path:
             voice_over_folder_path = 'voice_over'
             os.makedirs(voice_over_folder_path, exist_ok=True)
@@ -422,14 +432,17 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
         saved_voice_path = os.path.join(voice_over_folder_path, voice_filename)
         shutil.copy2(audio_input, saved_voice_path)
         voice_over_path = saved_voice_path
     if voice_over_path:
         try:
             progress(0.2, desc="Processing audio...")
             if generation_cancelled:
                 return None, "Generation cancelled"
             voice_over_audio = AudioFileClip(voice_over_path)
             target_duration_seconds = voice_over_audio.duration
             linelevel_subtitles, _ = process_voiceover_to_subtitles(voice_over_path)
             if generation_cancelled:
                 voice_over_audio.close()
                 return None, "Generation cancelled"
@@ -439,39 +452,50 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
         if not bg_music_path:
             return None, "Need text/audio or background music"
         target_duration_seconds = duration_minutes * 60
     progress(0.3, desc="Preparing audio...")
     if generation_cancelled:
         if voice_over_audio:
             voice_over_audio.close()
         return None, "Generation cancelled"
     audio_tracks = []
     if voice_over_audio:
         audio_tracks.append(voice_over_audio)
     if bg_music_path:
         try:
             background_audio = AudioFileClip(bg_music_path)
-            background_audio = background_audio.fx(afx.volumex, 0.10)
             background_audio = background_audio.fx(afx.audio_loop, duration=target_duration_seconds)
             audio_tracks.append(background_audio)
         except Exception as e:
             print(f"Background music error: {e}")
     final_audio = CompositeAudioClip(audio_tracks) if len(audio_tracks) > 1 else (audio_tracks[0] if audio_tracks else None)
     progress(0.4, desc="Setting up video...")
     if generation_cancelled:
         cleanup_resources()
         return None, "Generation cancelled"
     if video_quality == "High":
         target_height, bitrate, preset, crf = 1080, "8000k", "veryfast", "20"
     elif video_quality == "Standard":
         target_height, bitrate, preset, crf = 720, "4000k", "veryfast", "24"
     else:
         target_height, bitrate, preset, crf = 480, "1000k", "ultrafast", "28"
     progress(0.5, desc="Processing clips...")
     video_clips = []
     current_duration = 0
     file_index = 0
     safety_counter = 0
     max_iterations = len(all_files) * 3
     while current_duration < target_duration_seconds and safety_counter < max_iterations:
         if generation_cancelled:
             for clip in video_clips:
@@ -481,19 +505,24 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
                     pass
             cleanup_resources()
             return None, "Generation cancelled"
         if file_index >= len(all_files):
             file_index = 0
             random.shuffle(all_files)
         video_file = all_files[file_index]
         file_index += 1
         safety_counter += 1
         try:
             full_clip = VideoFileClip(os.path.join(source_path, video_file))
             current_video_clip = full_clip
             if generation_cancelled:
                 full_clip.close()
                 cleanup_resources()
                 return None, "Generation cancelled"
             if full_clip.h != target_height:
                 aspect_ratio = full_clip.w / full_clip.h
                 new_width = int(target_height * aspect_ratio)
@@ -503,16 +532,20 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
                 full_clip = full_clip.resize((new_width, adjusted_height))
             else:
                 full_clip = ensure_even_dimensions(full_clip)
             subclip = get_random_subclip_and_slow(full_clip)
             remaining_duration = target_duration_seconds - current_duration
             if subclip.duration > remaining_duration:
                 subclip = subclip.subclip(0, remaining_duration)
             video_clips.append(ensure_even_dimensions(subclip))
             current_duration += subclip.duration
             progress(0.5 + (safety_counter * 0.1 / max_iterations), desc=f"Clip {len(video_clips)}")
         except Exception as e:
             print(f"Error: {e}")
             continue
     if generation_cancelled:
         for clip in video_clips:
             try:
@@ -521,22 +554,30 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
                 pass
         cleanup_resources()
         return None, "Generation cancelled"
     if not video_clips:
         return None, "No clips processed"
     total_video_duration = sum(clip.duration for clip in video_clips)
     duration_diff = total_video_duration - target_duration_seconds
     if abs(duration_diff) > 0.1:
         if duration_diff > 0:
             trim_amount = duration_diff
-            new_last_clip = video_clips[-1].subclip(0, video_clips[-1].duration - trim_amount)
-            video_clips[-1] = new_last_clip
         else:
             extend_amount = abs(duration_diff)
             new_last_clip = video_clips[-1].fx(vfx.loop, duration=video_clips[-1].duration + extend_amount)
             video_clips[-1] = new_last_clip
     progress(0.6, desc="Applying transitions...")
     transition_duration = {"Snap Cut": 0.1, "Whip Pan": 0.3, "Dreamy Fade": 0.8, "Smooth Blend": 0.5, "Ken Burns Zoom": 0.5}.get(transition_type, 0.5)
     processed_clips = []
     for i in range(len(video_clips)):
         if i == 0:
@@ -551,7 +592,9 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
         else:
             _, clip_with_transition = apply_transition_effect(video_clips[i-1], video_clips[i], transition_type, transition_duration)
             processed_clips.append(clip_with_transition)
     progress(0.7, desc="Concatenating...")
     if generation_cancelled:
         for c in processed_clips:
             try:
@@ -560,15 +603,21 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
                 pass
         cleanup_resources()
         return None, "Generation cancelled"
     if transition_type == "Snap Cut":
         final_video_only = concatenate_videoclips(processed_clips, method="compose")
     else:
         final_video_only = concatenate_videoclips(processed_clips, method="compose", padding=-transition_duration)
     final_video_only = ensure_even_dimensions(final_video_only)
     current_video_clip = final_video_only
-    if final_audio:
-        final_video_only = final_video_only.set_duration(final_audio.duration)
     progress(0.8, desc="Adding overlays...")
     if generation_cancelled:
         try:
             final_video_only.close()
@@ -576,6 +625,7 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
             pass
         cleanup_resources()
         return None, "Generation cancelled"
     all_subtitle_clips = []
     if linelevel_subtitles:
         for line in linelevel_subtitles:
@@ -592,17 +642,22 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
             except Exception as e:
                 print(f"Subtitle error: {e}")
                 continue
     all_clips = [final_video_only.set_opacity(0.65)]
     if all_subtitle_clips:
         all_clips.extend(all_subtitle_clips)
     if title_text and title_text.strip():
         title_clips = create_title_overlay(title_text, final_video_only.size, duration=4)
         all_clips.extend(title_clips)
     final_video = CompositeVideoClip(all_clips)
     current_video_clip = final_video
     if final_audio:
         final_video = final_video.set_audio(final_audio)
     progress(0.9, desc="Exporting...")
     if generation_cancelled:
         try:
             final_video.close()
@@ -610,8 +665,10 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
             pass
         cleanup_resources()
         return None, "Generation cancelled"
     output_filename = f'video_{timestamp}.mp4'
     final_output_path = os.path.join(output_path, output_filename)
     try:
         final_video.write_videofile(
             final_output_path,
@@ -628,7 +685,9 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
         if generation_cancelled:
             return None, "Generation cancelled"
         return None, f"Export error: {str(e)}"
     progress(1.0, desc="Done")
     if generation_cancelled:
         try:
             if os.path.exists(final_output_path):
@@ -637,6 +696,7 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
             pass
         cleanup_resources()
         return None, "Generation cancelled"
     try:
         final_video.close()
         if voice_over_audio:
@@ -644,6 +704,7 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
         current_video_clip = None
     except:
         pass
     audio_source = ""
     if text_input and text_input.strip():
         audio_source = f"TTS ({AVAILABLE_VOICES[voice_selection]['name'] if voice_selection in AVAILABLE_VOICES else 'Puck'})"
@@ -651,9 +712,11 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
         audio_source = "Uploaded Audio"
     else:
         audio_source = "Background Music"
     summary = f"Complete\n{output_filename}\n{audio_source}\n{transition_type}\n{target_duration_seconds:.1f}s\n{len(linelevel_subtitles) if linelevel_subtitles else 0} subtitles"
     return final_output_path, summary
 with gr.Blocks(title="Video Generator", theme=gr.themes.Soft()) as interface:
     gr.Markdown("# 🎬 AI Video Generator")
     gr.Markdown("Upload video clips to `video_clips` folder and optionally background music to `background_music` folder.")
@@ -695,4 +758,5 @@ if __name__ == "__main__":
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True
-    )

 os.makedirs('voice_over', exist_ok=True)
 os.makedirs('exports', exist_ok=True)
+# Get API key from environment variable
 GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY', '')
 if GOOGLE_API_KEY:
     os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
     generation_cancelled = False
     current_video_clip = None
     progress(0, desc="Starting...")
     if generation_cancelled:
         return None, "Generation cancelled"
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     source_path = 'video_clips'
     if not os.path.isdir(source_path):
         return None, "Video clips folder not found. Please upload video clips to the 'video_clips' folder."
     output_path = 'exports'
     os.makedirs(output_path, exist_ok=True)
     video_extensions = ('.mp4', '.avi', '.mkv', '.mov')
     all_files = [f for f in os.listdir(source_path) if f.lower().endswith(video_extensions)]
     if not all_files:
         return None, "No video files found in 'video_clips' folder"
     random.shuffle(all_files)
     if generation_cancelled:
         return None, "Generation cancelled"
     bg_music_path = None
     bg_music_folder_path = 'background_music'
     if os.path.isdir(bg_music_folder_path):
         possible_files = [f for f in os.listdir(bg_music_folder_path) if f.lower().endswith(audio_extensions) and not f.startswith('voiceover_')]
         if len(possible_files) >= 1:
             bg_music_path = os.path.join(bg_music_folder_path, possible_files[0])
     target_duration_seconds = 0
     voice_over_audio = None
     linelevel_subtitles = None
     voice_over_path = None
     if text_input and text_input.strip():
         progress(0.1, desc="Generating TTS...")
         voice_name = AVAILABLE_VOICES[voice_selection]["name"] if voice_selection in AVAILABLE_VOICES else "Puck"
         tts_path, tts_message = generate_tts_audio(text_input, voice_name)
         if generation_cancelled:
             return None, "Generation cancelled"
         if tts_path:
             voice_over_folder_path = 'voice_over'
             os.makedirs(voice_over_folder_path, exist_ok=True)
         saved_voice_path = os.path.join(voice_over_folder_path, voice_filename)
         shutil.copy2(audio_input, saved_voice_path)
         voice_over_path = saved_voice_path
     if voice_over_path:
         try:
             progress(0.2, desc="Processing audio...")
             if generation_cancelled:
                 return None, "Generation cancelled"
             voice_over_audio = AudioFileClip(voice_over_path)
             target_duration_seconds = voice_over_audio.duration
             linelevel_subtitles, _ = process_voiceover_to_subtitles(voice_over_path)
             if generation_cancelled:
                 voice_over_audio.close()
                 return None, "Generation cancelled"
         if not bg_music_path:
             return None, "Need text/audio or background music"
         target_duration_seconds = duration_minutes * 60
     progress(0.3, desc="Preparing audio...")
     if generation_cancelled:
         if voice_over_audio:
             voice_over_audio.close()
         return None, "Generation cancelled"
     audio_tracks = []
     if voice_over_audio:
         audio_tracks.append(voice_over_audio)
     if bg_music_path:
         try:
             background_audio = AudioFileClip(bg_music_path)
+            background_audio = background_audio.fx(afx.volumex, 0.10)  # Increased from 0.015 to 0.10
             background_audio = background_audio.fx(afx.audio_loop, duration=target_duration_seconds)
             audio_tracks.append(background_audio)
         except Exception as e:
             print(f"Background music error: {e}")
     final_audio = CompositeAudioClip(audio_tracks) if len(audio_tracks) > 1 else (audio_tracks[0] if audio_tracks else None)
     progress(0.4, desc="Setting up video...")
     if generation_cancelled:
         cleanup_resources()
         return None, "Generation cancelled"
     if video_quality == "High":
         target_height, bitrate, preset, crf = 1080, "8000k", "veryfast", "20"
     elif video_quality == "Standard":
         target_height, bitrate, preset, crf = 720, "4000k", "veryfast", "24"
     else:
         target_height, bitrate, preset, crf = 480, "1000k", "ultrafast", "28"
     progress(0.5, desc="Processing clips...")
     video_clips = []
     current_duration = 0
     file_index = 0
     safety_counter = 0
     max_iterations = len(all_files) * 3
     while current_duration < target_duration_seconds and safety_counter < max_iterations:
         if generation_cancelled:
             for clip in video_clips:
                     pass
             cleanup_resources()
             return None, "Generation cancelled"
         if file_index >= len(all_files):
             file_index = 0
             random.shuffle(all_files)
         video_file = all_files[file_index]
         file_index += 1
         safety_counter += 1
         try:
             full_clip = VideoFileClip(os.path.join(source_path, video_file))
             current_video_clip = full_clip
             if generation_cancelled:
                 full_clip.close()
                 cleanup_resources()
                 return None, "Generation cancelled"
             if full_clip.h != target_height:
                 aspect_ratio = full_clip.w / full_clip.h
                 new_width = int(target_height * aspect_ratio)
                 full_clip = full_clip.resize((new_width, adjusted_height))
             else:
                 full_clip = ensure_even_dimensions(full_clip)
             subclip = get_random_subclip_and_slow(full_clip)
             remaining_duration = target_duration_seconds - current_duration
             if subclip.duration > remaining_duration:
                 subclip = subclip.subclip(0, remaining_duration)
             video_clips.append(ensure_even_dimensions(subclip))
             current_duration += subclip.duration
             progress(0.5 + (safety_counter * 0.1 / max_iterations), desc=f"Clip {len(video_clips)}")
         except Exception as e:
             print(f"Error: {e}")
             continue
     if generation_cancelled:
         for clip in video_clips:
             try:
                 pass
         cleanup_resources()
         return None, "Generation cancelled"
     if not video_clips:
         return None, "No clips processed"
+    # Fix: Ensure video clips match audio duration exactly
     total_video_duration = sum(clip.duration for clip in video_clips)
     duration_diff = total_video_duration - target_duration_seconds
     if abs(duration_diff) > 0.1:
         if duration_diff > 0:
+            # Video is longer than audio - trim the end
             trim_amount = duration_diff
+            if video_clips[-1].duration > trim_amount:
+                new_last_clip = video_clips[-1].subclip(0, video_clips[-1].duration - trim_amount)
+                video_clips[-1] = new_last_clip
         else:
+            # Video is shorter than audio - loop the last clip to extend
             extend_amount = abs(duration_diff)
             new_last_clip = video_clips[-1].fx(vfx.loop, duration=video_clips[-1].duration + extend_amount)
             video_clips[-1] = new_last_clip
     progress(0.6, desc="Applying transitions...")
     transition_duration = {"Snap Cut": 0.1, "Whip Pan": 0.3, "Dreamy Fade": 0.8, "Smooth Blend": 0.5, "Ken Burns Zoom": 0.5}.get(transition_type, 0.5)
     processed_clips = []
     for i in range(len(video_clips)):
         if i == 0:
         else:
             _, clip_with_transition = apply_transition_effect(video_clips[i-1], video_clips[i], transition_type, transition_duration)
             processed_clips.append(clip_with_transition)
     progress(0.7, desc="Concatenating...")
     if generation_cancelled:
         for c in processed_clips:
             try:
                 pass
         cleanup_resources()
         return None, "Generation cancelled"
     if transition_type == "Snap Cut":
         final_video_only = concatenate_videoclips(processed_clips, method="compose")
     else:
         final_video_only = concatenate_videoclips(processed_clips, method="compose", padding=-transition_duration)
     final_video_only = ensure_even_dimensions(final_video_only)
     current_video_clip = final_video_only
+    # Fix: Loop video if shorter than audio to prevent black screen
+    if final_audio and final_video_only.duration < final_audio.duration:
+        final_video_only = final_video_only.fx(vfx.loop, duration=final_audio.duration)
     progress(0.8, desc="Adding overlays...")
     if generation_cancelled:
         try:
             final_video_only.close()
             pass
         cleanup_resources()
         return None, "Generation cancelled"
     all_subtitle_clips = []
     if linelevel_subtitles:
         for line in linelevel_subtitles:
             except Exception as e:
                 print(f"Subtitle error: {e}")
                 continue
     all_clips = [final_video_only.set_opacity(0.65)]
     if all_subtitle_clips:
         all_clips.extend(all_subtitle_clips)
     if title_text and title_text.strip():
         title_clips = create_title_overlay(title_text, final_video_only.size, duration=4)
         all_clips.extend(title_clips)
     final_video = CompositeVideoClip(all_clips)
     current_video_clip = final_video
     if final_audio:
         final_video = final_video.set_audio(final_audio)
     progress(0.9, desc="Exporting...")
     if generation_cancelled:
         try:
             final_video.close()
             pass
         cleanup_resources()
         return None, "Generation cancelled"
     output_filename = f'video_{timestamp}.mp4'
     final_output_path = os.path.join(output_path, output_filename)
     try:
         final_video.write_videofile(
             final_output_path,
         if generation_cancelled:
             return None, "Generation cancelled"
         return None, f"Export error: {str(e)}"
     progress(1.0, desc="Done")
     if generation_cancelled:
         try:
             if os.path.exists(final_output_path):
             pass
         cleanup_resources()
         return None, "Generation cancelled"
     try:
         final_video.close()
         if voice_over_audio:
         current_video_clip = None
     except:
         pass
     audio_source = ""
     if text_input and text_input.strip():
         audio_source = f"TTS ({AVAILABLE_VOICES[voice_selection]['name'] if voice_selection in AVAILABLE_VOICES else 'Puck'})"
         audio_source = "Uploaded Audio"
     else:
         audio_source = "Background Music"
     summary = f"Complete\n{output_filename}\n{audio_source}\n{transition_type}\n{target_duration_seconds:.1f}s\n{len(linelevel_subtitles) if linelevel_subtitles else 0} subtitles"
     return final_output_path, summary
+# Gradio Interface
 with gr.Blocks(title="Video Generator", theme=gr.themes.Soft()) as interface:
     gr.Markdown("# 🎬 AI Video Generator")
     gr.Markdown("Upload video clips to `video_clips` folder and optionally background music to `background_music` folder.")
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True
+    )
+        final_video_only