Shreevathsam commited on
Commit
ddc8837
·
verified ·
1 Parent(s): c534833

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +323 -157
app.py CHANGED
@@ -5,22 +5,25 @@ import whisper
5
  import shutil
6
  import wave
7
  import base64
8
- from moviepy.editor import (VideoFileClip, AudioFileClip, concatenate_videoclips,
9
- CompositeVideoClip, CompositeAudioClip, ImageClip)
10
  import moviepy.audio.fx.all as afx
11
  import moviepy.video.fx.all as vfx
12
  import gradio as gr
13
  from PIL import Image, ImageDraw, ImageFont
14
  import numpy as np
 
15
  import urllib.request
16
  from google import genai
17
  from google.genai import types
18
 
 
19
  os.makedirs('video_clips', exist_ok=True)
20
  os.makedirs('background_music', exist_ok=True)
21
  os.makedirs('voice_over', exist_ok=True)
22
  os.makedirs('exports', exist_ok=True)
23
 
 
24
  GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY', '')
25
  if GOOGLE_API_KEY:
26
  os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
@@ -74,7 +77,7 @@ def generate_tts_audio(text_input, voice_name="Puck"):
74
  return None, f"Error: {str(e)}"
75
 
76
  def split_text_into_lines(data):
77
- MaxChars, MaxDuration, MaxGap = 40, 2.5, 1.5
78
  subtitles, line, line_duration = [], [], 0
79
  for idx, word_data in enumerate(data):
80
  line.append(word_data)
@@ -101,100 +104,188 @@ def split_text_into_lines(data):
101
  })
102
  return subtitles
103
 
104
- def create_subtitle_image(text, frame_size, fontsize=42):
105
- """Create subtitle as PIL Image - more reliable than TextClip"""
106
- frame_width, frame_height = frame_size
107
-
108
- # Load font
109
- FONT_PATH = None
110
- try:
111
- FONT_URL = "https://github.com/google/fonts/raw/main/ofl/poppins/Poppins-Bold.ttf"
112
- FONT_PATH = "/tmp/Poppins-Bold.ttf"
113
- if not os.path.exists(FONT_PATH):
114
- urllib.request.urlretrieve(FONT_URL, FONT_PATH)
115
- font = ImageFont.truetype(FONT_PATH, fontsize)
116
- except:
117
- font = ImageFont.load_default()
118
-
119
- # Create transparent image
120
- img = Image.new('RGBA', (frame_width, frame_height), (0, 0, 0, 0))
121
- draw = ImageDraw.Draw(img)
122
-
123
- # Get text size
124
- bbox = draw.textbbox((0, 0), text.upper(), font=font)
125
- text_width = bbox[2] - bbox[0]
126
- text_height = bbox[3] - bbox[1]
127
-
128
- # Position at bottom center
129
- x = (frame_width - text_width) // 2
130
- y = int(frame_height * 0.75)
131
-
132
- # Draw background
133
- padding = 20
134
- bg_x1 = x - padding
135
- bg_y1 = y - padding
136
- bg_x2 = x + text_width + padding
137
- bg_y2 = y + text_height + padding
138
- draw.rounded_rectangle([bg_x1, bg_y1, bg_x2, bg_y2], radius=15, fill=(0, 0, 0, 180))
139
-
140
- # Draw text with shadow
141
- draw.text((x+2, y+2), text.upper(), font=font, fill=(0, 0, 0, 255))
142
- draw.text((x, y), text.upper(), font=font, fill=(255, 255, 255, 255))
143
-
144
- return np.array(img)
145
-
146
- def create_simple_subtitles(subtitle_data, frame_size, total_duration):
147
- """Create simple, reliable subtitles using ImageClips"""
148
- subtitle_clips = []
149
-
150
- for item in subtitle_data:
151
- text = item['word']
152
- start_time = item['start']
153
- end_time = item['end']
154
- duration = end_time - start_time
155
-
156
- # Create subtitle image
157
- img_array = create_subtitle_image(text, frame_size)
158
-
159
- # Create ImageClip
160
- clip = ImageClip(img_array, duration=duration)
161
- clip = clip.set_start(start_time)
162
-
163
- subtitle_clips.append(clip)
164
-
165
- return subtitle_clips
166
 
167
  def create_title_overlay(title_text, framesize, duration=4):
168
  if not title_text or not title_text.strip():
169
  return []
170
  frame_width, frame_height = framesize
171
-
172
- try:
173
- FONT_URL = "https://github.com/google/fonts/raw/main/ofl/poppins/Poppins-Bold.ttf"
174
- FONT_PATH = "/tmp/Poppins-Bold.ttf"
175
- if not os.path.exists(FONT_PATH):
176
  urllib.request.urlretrieve(FONT_URL, FONT_PATH)
177
- font = ImageFont.truetype(FONT_PATH, int(frame_height * 0.06))
178
- except:
179
- font = ImageFont.load_default()
180
-
 
 
 
 
 
 
 
 
 
 
181
  base = Image.new("RGBA", (frame_width, frame_height), (0, 0, 0, 0))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  draw = ImageDraw.Draw(base)
183
-
184
- # Simple centered title
185
- text = title_text.upper()
186
- bbox = draw.textbbox((0, 0), text, font=font)
187
- text_width = bbox[2] - bbox[0]
188
-
189
- x = (frame_width - text_width) // 2
190
- y = int(frame_height * 0.1)
191
-
192
- # Shadow and text
193
- draw.text((x+3, y+3), text, font=font, fill=(0, 0, 0, 200))
194
- draw.text((x, y), text, font=font, fill=(255, 255, 255, 255))
195
-
196
  return [ImageClip(np.array(base), duration=duration)]
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  def get_random_subclip_and_slow(clip):
199
  subclip_durations = [2, 3, 4]
200
  subclip_duration = random.choice(subclip_durations)
@@ -278,12 +369,14 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
278
  generation_cancelled = False
279
  current_video_clip = None
280
  progress(0, desc="Starting...")
281
-
 
282
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
 
 
283
  source_path = 'video_clips'
284
  if not os.path.isdir(source_path):
285
  return None, "Video clips folder not found"
286
-
287
  output_path = 'exports'
288
  os.makedirs(output_path, exist_ok=True)
289
 
@@ -291,25 +384,30 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
291
  all_files = [f for f in os.listdir(source_path) if f.lower().endswith(video_extensions)]
292
  if not all_files:
293
  return None, "No video files found"
294
-
295
  random.shuffle(all_files)
296
-
 
297
  bg_music_path = None
 
 
298
  bg_music_folder_path = 'background_music'
299
  if os.path.isdir(bg_music_folder_path):
300
- audio_extensions = ('.mp3', '.wav', '.aac')
301
- possible_files = [f for f in os.listdir(bg_music_folder_path) if f.lower().endswith(audio_extensions)]
302
- if possible_files:
303
  bg_music_path = os.path.join(bg_music_folder_path, possible_files[0])
304
-
305
- voice_over_path = None
306
  linelevel_subtitles = None
307
-
308
  if text_input and text_input.strip():
309
  progress(0.1, desc="Generating TTS...")
310
  voice_name = AVAILABLE_VOICES[voice_selection]["name"] if voice_selection in AVAILABLE_VOICES else "Puck"
311
  tts_path, tts_message = generate_tts_audio(text_input, voice_name)
 
 
312
  if tts_path:
 
313
  voice_over_folder_path = 'voice_over'
314
  os.makedirs(voice_over_folder_path, exist_ok=True)
315
  voice_filename = f"tts_voiceover_{timestamp}.wav"
@@ -319,70 +417,87 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
319
  else:
320
  return None, f"TTS failed: {tts_message}"
321
  elif audio_input:
 
 
322
  voice_over_folder_path = 'voice_over'
323
  os.makedirs(voice_over_folder_path, exist_ok=True)
324
  voice_filename = f"uploaded_voiceover_{timestamp}.mp3"
325
  saved_voice_path = os.path.join(voice_over_folder_path, voice_filename)
326
  shutil.copy2(audio_input, saved_voice_path)
327
  voice_over_path = saved_voice_path
328
-
329
  if voice_over_path:
330
  try:
331
  progress(0.2, desc="Processing audio...")
 
 
332
  voice_over_audio = AudioFileClip(voice_over_path)
333
  target_duration_seconds = voice_over_audio.duration
334
  linelevel_subtitles, _ = process_voiceover_to_subtitles(voice_over_path)
 
 
 
335
  except Exception as e:
336
  return None, f"Audio error: {str(e)}"
337
  else:
338
  if not bg_music_path:
339
  return None, "Need text/audio or background music"
340
  target_duration_seconds = duration_minutes * 60
341
- voice_over_audio = None
342
-
343
  progress(0.3, desc="Preparing audio...")
344
-
 
 
 
345
  audio_tracks = []
346
  if voice_over_audio:
347
  audio_tracks.append(voice_over_audio)
348
-
349
  if bg_music_path:
350
  try:
351
  background_audio = AudioFileClip(bg_music_path)
 
352
  background_audio = background_audio.fx(afx.volumex, 0.10)
353
  background_audio = background_audio.fx(afx.audio_loop, duration=target_duration_seconds)
354
  audio_tracks.append(background_audio)
355
  except Exception as e:
356
  print(f"Background music error: {e}")
357
-
358
  final_audio = CompositeAudioClip(audio_tracks) if len(audio_tracks) > 1 else (audio_tracks[0] if audio_tracks else None)
359
-
360
  progress(0.4, desc="Setting up video...")
361
-
 
 
362
  if video_quality == "High":
363
  target_height, bitrate, preset, crf = 1080, "8000k", "veryfast", "20"
364
  elif video_quality == "Standard":
365
  target_height, bitrate, preset, crf = 720, "4000k", "veryfast", "24"
366
  else:
367
  target_height, bitrate, preset, crf = 480, "1000k", "ultrafast", "28"
368
-
369
  progress(0.5, desc="Processing clips...")
370
-
371
  video_clips = []
372
  current_duration = 0
373
  file_index = 0
374
-
375
- while current_duration < target_duration_seconds:
 
 
 
 
 
 
 
 
 
376
  if file_index >= len(all_files):
377
  file_index = 0
378
  random.shuffle(all_files)
379
-
380
  video_file = all_files[file_index]
381
  file_index += 1
382
-
383
  try:
384
  full_clip = VideoFileClip(os.path.join(source_path, video_file))
385
-
 
 
 
 
386
  if full_clip.h != target_height:
387
  aspect_ratio = full_clip.w / full_clip.h
388
  new_width = int(target_height * aspect_ratio)
@@ -392,37 +507,42 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
392
  full_clip = full_clip.resize((new_width, adjusted_height))
393
  else:
394
  full_clip = ensure_even_dimensions(full_clip)
395
-
396
  subclip = get_random_subclip_and_slow(full_clip)
397
  remaining_duration = target_duration_seconds - current_duration
398
-
399
  if subclip.duration > remaining_duration:
400
  subclip = subclip.subclip(0, remaining_duration)
401
-
402
  video_clips.append(ensure_even_dimensions(subclip))
403
  current_duration += subclip.duration
 
404
  except Exception as e:
405
- print(f"Error processing {video_file}: {e}")
406
  continue
407
-
 
 
 
 
 
 
 
408
  if not video_clips:
409
  return None, "No clips processed"
410
 
411
- # Ensure exact duration match
412
  total_video_duration = sum(clip.duration for clip in video_clips)
413
  duration_diff = total_video_duration - target_duration_seconds
414
-
415
  if abs(duration_diff) > 0.1:
416
  if duration_diff > 0:
417
  trim_amount = duration_diff
418
- video_clips[-1] = video_clips[-1].subclip(0, video_clips[-1].duration - trim_amount)
 
419
  else:
420
  extend_amount = abs(duration_diff)
421
- video_clips[-1] = video_clips[-1].fx(vfx.loop, duration=video_clips[-1].duration + extend_amount)
 
422
 
423
  progress(0.6, desc="Applying transitions...")
424
  transition_duration = {"Snap Cut": 0.1, "Whip Pan": 0.3, "Dreamy Fade": 0.8, "Smooth Blend": 0.5, "Ken Burns Zoom": 0.5}.get(transition_type, 0.5)
425
-
426
  processed_clips = []
427
  for i in range(len(video_clips)):
428
  if i == 0:
@@ -437,45 +557,70 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
437
  else:
438
  _, clip_with_transition = apply_transition_effect(video_clips[i-1], video_clips[i], transition_type, transition_duration)
439
  processed_clips.append(clip_with_transition)
440
-
441
  progress(0.7, desc="Concatenating...")
442
-
 
 
 
 
 
 
 
443
  if transition_type == "Snap Cut":
444
  final_video_only = concatenate_videoclips(processed_clips, method="compose")
445
  else:
446
  final_video_only = concatenate_videoclips(processed_clips, method="compose", padding=-transition_duration)
447
-
448
  final_video_only = ensure_even_dimensions(final_video_only)
 
449
 
450
- # Fix black screen - loop if needed
451
  if final_audio and final_video_only.duration < final_audio.duration:
452
  final_video_only = final_video_only.fx(vfx.loop, duration=final_audio.duration)
453
 
454
  progress(0.8, desc="Adding overlays...")
455
-
456
- # Create subtitle clips using reliable method
457
- all_clips = [final_video_only.set_opacity(0.65)]
458
-
 
 
 
 
459
  if linelevel_subtitles:
460
- print(f"Creating {len(linelevel_subtitles)} subtitle sections")
461
- subtitle_clips = create_simple_subtitles(linelevel_subtitles, final_video_only.size, final_video_only.duration)
462
- all_clips.extend(subtitle_clips)
463
- print(f"Added {len(subtitle_clips)} subtitle clips")
464
-
 
 
 
 
 
 
 
 
 
 
 
 
465
  if title_text and title_text.strip():
466
  title_clips = create_title_overlay(title_text, final_video_only.size, duration=4)
467
  all_clips.extend(title_clips)
468
-
469
  final_video = CompositeVideoClip(all_clips)
470
-
471
  if final_audio:
472
  final_video = final_video.set_audio(final_audio)
473
-
474
  progress(0.9, desc="Exporting...")
475
-
 
 
 
 
 
 
476
  output_filename = f'video_{timestamp}.mp4'
477
  final_output_path = os.path.join(output_path, output_filename)
478
-
479
  try:
480
  final_video.write_videofile(
481
  final_output_path,
@@ -486,50 +631,66 @@ def merge_videos_with_subtitles(text_input, voice_selection, audio_input, title_
486
  bitrate=bitrate,
487
  audio_bitrate="128k",
488
  threads=8,
489
- ffmpeg_params=["-crf", crf, "-pix_fmt", "yuv420p", "-movflags", "+faststart"]
490
  )
491
  except Exception as e:
 
 
492
  return None, f"Export error: {str(e)}"
493
-
494
  progress(1.0, desc="Done")
495
-
 
 
 
 
 
 
 
496
  try:
497
  final_video.close()
498
  if voice_over_audio:
499
  voice_over_audio.close()
 
500
  except:
501
  pass
502
-
503
- audio_source = "TTS" if text_input else ("Uploaded" if audio_input else "BGM")
504
- summary = f"Complete\n{output_filename}\n{audio_source}\n{target_duration_seconds:.1f}s\n{len(linelevel_subtitles) if linelevel_subtitles else 0} subs"
 
 
 
 
 
505
  return final_output_path, summary
506
 
 
507
  with gr.Blocks(title="Video Generator", theme=gr.themes.Soft()) as interface:
508
  gr.Markdown("# 🎬 AI Video Generator")
 
509
 
510
  with gr.Row():
511
  with gr.Column():
512
- text_input = gr.Textbox(label="Text for TTS", lines=4)
513
  voice_dropdown = gr.Dropdown(
514
  choices=[(f"{v['name']} - {v['description']}", k) for k, v in AVAILABLE_VOICES.items()],
515
  value="Puck",
516
- label="Voice"
517
  )
518
- audio_input = gr.Audio(type="filepath", label="Or Upload Audio")
519
- title_input = gr.Textbox(label="Title (Optional)", lines=2)
520
- duration_slider = gr.Slider(0.5, 10, 2, 0.5, label="Duration (min)")
521
- quality_radio = gr.Radio(["High", "Standard", "Preview"], value="High", label="Quality")
522
  transition_radio = gr.Radio(
523
  ["Smooth Blend", "Ken Burns Zoom", "Whip Pan", "Dreamy Fade", "Snap Cut"],
524
  value="Smooth Blend",
525
- label="Transition"
526
  )
527
  with gr.Row():
528
- submit_btn = gr.Button("Generate Video", variant="primary")
529
- stop_btn = gr.Button("Stop", variant="stop")
530
 
531
  with gr.Column():
532
- video_output = gr.Video(label="Output")
533
  summary_output = gr.Textbox(label="Status", lines=8)
534
 
535
  submit_btn.click(
@@ -539,5 +700,10 @@ with gr.Blocks(title="Video Generator", theme=gr.themes.Soft()) as interface:
539
  )
540
  stop_btn.click(fn=cancel_generation, outputs=[summary_output, video_output])
541
 
 
542
  if __name__ == "__main__":
543
- interface.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
 
 
 
 
 
5
  import shutil
6
  import wave
7
  import base64
8
+ from moviepy.editor import (VideoFileClip, AudioFileClip, TextClip,
9
+ concatenate_videoclips, CompositeVideoClip, CompositeAudioClip, ImageClip)
10
  import moviepy.audio.fx.all as afx
11
  import moviepy.video.fx.all as vfx
12
  import gradio as gr
13
  from PIL import Image, ImageDraw, ImageFont
14
  import numpy as np
15
+ from functools import lru_cache
16
  import urllib.request
17
  from google import genai
18
  from google.genai import types
19
 
20
+ # CHANGED: Create local directories instead of Google Drive paths
21
  os.makedirs('video_clips', exist_ok=True)
22
  os.makedirs('background_music', exist_ok=True)
23
  os.makedirs('voice_over', exist_ok=True)
24
  os.makedirs('exports', exist_ok=True)
25
 
26
+ # CHANGED: Get API key from environment variable (secure method)
27
  GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY', '')
28
  if GOOGLE_API_KEY:
29
  os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
 
77
  return None, f"Error: {str(e)}"
78
 
79
  def split_text_into_lines(data):
80
+ MaxChars, MaxDuration, MaxGap = 60, 2.5, 1.5
81
  subtitles, line, line_duration = [], [], 0
82
  for idx, word_data in enumerate(data):
83
  line.append(word_data)
 
104
  })
105
  return subtitles
106
 
107
+ @lru_cache(maxsize=1000)
108
+ def get_cached_text_clip(text, font, fontsize, color):
109
+ return TextClip(text, font=font, fontsize=fontsize, color=color)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  def create_title_overlay(title_text, framesize, duration=4):
112
  if not title_text or not title_text.strip():
113
  return []
114
  frame_width, frame_height = framesize
115
+ FONT_URL = "https://github.com/google/fonts/raw/main/ofl/poppins/Poppins-Bold.ttf"
116
+ FONT_PATH = "/tmp/Poppins-Bold.ttf"
117
+ if not os.path.exists(FONT_PATH):
118
+ try:
 
119
  urllib.request.urlretrieve(FONT_URL, FONT_PATH)
120
+ except:
121
+ FONT_PATH = None
122
+ TOP_MARGIN = int(frame_height * 0.115)
123
+ FONT_SIZE = int(frame_height * 0.042)
124
+ STROKE_WIDTH = max(1, int(frame_height * 0.003))
125
+ LINE_SPACING = max(4, int(frame_height * 0.008))
126
+ def load_font(size):
127
+ try:
128
+ if FONT_PATH and os.path.exists(FONT_PATH):
129
+ return ImageFont.truetype(FONT_PATH, size)
130
+ return ImageFont.truetype("/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", size)
131
+ except:
132
+ return ImageFont.load_default()
133
+ font_obj = load_font(FONT_SIZE)
134
  base = Image.new("RGBA", (frame_width, frame_height), (0, 0, 0, 0))
135
+ temp_img = Image.new("RGBA", (frame_width, frame_height), (0,0,0,0))
136
+ temp_draw = ImageDraw.Draw(temp_img)
137
+ def measure_text(text, font):
138
+ try:
139
+ bbox = temp_draw.textbbox((0,0), text, font=font, stroke_width=STROKE_WIDTH)
140
+ return bbox[2]-bbox[0], bbox[3]-bbox[1]
141
+ except:
142
+ return 100, 50
143
+ def wrap_text(text, font, max_width):
144
+ words = text.upper().split()
145
+ lines, current = [], []
146
+ for word in words:
147
+ test_line = " ".join(current + [word])
148
+ w, _ = measure_text(test_line, font)
149
+ if w <= max_width:
150
+ current.append(word)
151
+ else:
152
+ if current:
153
+ lines.append(" ".join(current))
154
+ current = [word]
155
+ else:
156
+ lines.append(word)
157
+ current = []
158
+ if current:
159
+ lines.append(" ".join(current))
160
+ return lines[:4]
161
+ lines = wrap_text(title_text, font_obj, frame_width * 0.90)
162
+ line_heights = [measure_text(line, font_obj)[1] for line in lines]
163
+ y_start = TOP_MARGIN
164
+ x_center = frame_width // 2
165
  draw = ImageDraw.Draw(base)
166
+ y = y_start
167
+ for i, line in enumerate(lines):
168
+ w, h = measure_text(line, font_obj)
169
+ x = x_center - w // 2
170
+ draw.text((x+2, y+2), line, font=font_obj, fill=(0,0,0,180))
171
+ draw.text((x, y), line, font=font_obj, fill=(255,255,255,255), stroke_width=STROKE_WIDTH, stroke_fill=(0,0,0,255))
172
+ y += line_heights[i] + LINE_SPACING
 
 
 
 
 
 
173
  return [ImageClip(np.array(base), duration=duration)]
174
 
175
+ def create_caption(textJSON, framesize, font="Helvetica-Bold", fontsize=14, color='white'):
176
+ full_duration = textJSON['end'] - textJSON['start']
177
+ word_clips = []
178
+ xy_textclips_positions = []
179
+ frame_width, frame_height = framesize
180
+ max_line_width = frame_width * 0.8
181
+ lines, current_line, current_line_width = [], [], 0
182
+ for wordJSON in textJSON['textcontents']:
183
+ word_upper = wordJSON['word'].upper()
184
+ temp_word = get_cached_text_clip(word_upper, font, fontsize, color)
185
+ temp_space = get_cached_text_clip(" ", font, fontsize, color)
186
+ word_width, word_height = temp_word.size
187
+ space_width, _ = temp_space.size
188
+ if current_line_width + word_width + space_width > max_line_width and current_line:
189
+ lines.append({'words': current_line.copy(), 'width': current_line_width, 'height': word_height})
190
+ current_line = [wordJSON]
191
+ current_line_width = word_width + space_width
192
+ else:
193
+ current_line.append(wordJSON)
194
+ current_line_width += word_width + space_width
195
+ if current_line:
196
+ word_upper = current_line[0]['word'].upper()
197
+ temp_word = get_cached_text_clip(word_upper, font, fontsize, color)
198
+ _, word_height = temp_word.size
199
+ lines.append({'words': current_line, 'width': current_line_width, 'height': word_height})
200
+ total_text_height = sum(line['height'] for line in lines) + (len(lines) - 1) * 3
201
+ subtitle_y_position = int(frame_height * 0.65)
202
+ current_y = subtitle_y_position
203
+ if lines:
204
+ shadow_padding = 25
205
+ shadow_height_extra = 15
206
+ total_subtitle_width = max(line['width'] for line in lines)
207
+ bg_width = int(total_subtitle_width + shadow_padding * 2)
208
+ bg_height = int(total_text_height + shadow_height_extra * 2)
209
+ img = Image.new('RGBA', (bg_width, bg_height), (0, 0, 0, 0))
210
+ draw = ImageDraw.Draw(img)
211
+ draw.rounded_rectangle([(0, 0), (bg_width-1, bg_height-1)], radius=15, fill=(0, 0, 0, 128))
212
+ img_array = np.array(img)
213
+ shadow_bg = ImageClip(img_array, duration=full_duration).set_start(textJSON['start'])
214
+ shadow_x = (frame_width - total_subtitle_width) / 2 - shadow_padding
215
+ shadow_y = subtitle_y_position - shadow_height_extra
216
+ shadow_bg = shadow_bg.set_position((shadow_x, shadow_y))
217
+ word_clips.append(shadow_bg)
218
+ for line in lines:
219
+ line_words = line['words']
220
+ word_dimensions = []
221
+ for wordJSON in line_words:
222
+ word_upper = wordJSON['word'].upper()
223
+ temp_word = get_cached_text_clip(word_upper, font, fontsize, color)
224
+ temp_space = get_cached_text_clip(" ", font, fontsize, color)
225
+ word_width, word_height = temp_word.size
226
+ space_width, _ = temp_space.size
227
+ word_dimensions.append({
228
+ 'word_data': wordJSON,
229
+ 'word_width': word_width,
230
+ 'word_height': word_height,
231
+ 'space_width': space_width,
232
+ 'word_upper': word_upper
233
+ })
234
+ line_start_x = (frame_width - line['width']) / 2
235
+ current_x = line_start_x
236
+ for word_dim in word_dimensions:
237
+ wordJSON = word_dim['word_data']
238
+ word_width = word_dim['word_width']
239
+ word_height = word_dim['word_height']
240
+ space_width = word_dim['space_width']
241
+ word_upper = word_dim['word_upper']
242
+ shadow_text = get_cached_text_clip(word_upper, font, fontsize, 'black')
243
+ shadow_text = shadow_text.set_start(textJSON['start']).set_duration(full_duration)
244
+ shadow_text = shadow_text.set_position((current_x + 1, current_y + 1)).set_opacity(0.3)
245
+ word_clips.append(shadow_text)
246
+ word_clip = get_cached_text_clip(word_upper, font, fontsize, color)
247
+ word_clip = word_clip.set_start(textJSON['start']).set_duration(full_duration)
248
+ word_clip = word_clip.set_position((current_x, current_y))
249
+ space_clip = get_cached_text_clip(" ", font, fontsize, color)
250
+ space_clip = space_clip.set_start(textJSON['start']).set_duration(full_duration)
251
+ space_clip = space_clip.set_position((current_x + word_width, current_y))
252
+ xy_textclips_positions.append({
253
+ "x_pos": current_x,
254
+ "y_pos": current_y,
255
+ "width": word_width,
256
+ "height": word_height,
257
+ "word": word_upper,
258
+ "start": wordJSON['start'],
259
+ "end": wordJSON['end'],
260
+ "duration": wordJSON['end'] - wordJSON['start']
261
+ })
262
+ word_clips.append(word_clip)
263
+ word_clips.append(space_clip)
264
+ current_x += word_width + space_width
265
+ current_y += line['height'] + 3
266
+ for highlight_word in xy_textclips_positions:
267
+ bg_width = int(highlight_word['width'] + 16)
268
+ bg_height = int(highlight_word['height'] + 8)
269
+ img = Image.new('RGBA', (bg_width, bg_height), (0, 0, 0, 0))
270
+ draw = ImageDraw.Draw(img)
271
+ draw.rounded_rectangle([(0, 0), (bg_width-1, bg_height-1)], radius=8, fill=(147, 0, 211, 180))
272
+ img_array = np.array(img)
273
+ bg_clip = ImageClip(img_array, duration=highlight_word['duration'])
274
+ bg_clip = bg_clip.set_start(highlight_word['start'])
275
+ bg_x = highlight_word['x_pos'] - 8
276
+ bg_y = highlight_word['y_pos'] - 4
277
+ bg_clip = bg_clip.set_position((bg_x, bg_y))
278
+ shadow_highlight = get_cached_text_clip(highlight_word['word'], font, fontsize, 'black')
279
+ shadow_highlight = shadow_highlight.set_start(highlight_word['start']).set_duration(highlight_word['duration'])
280
+ shadow_highlight = shadow_highlight.set_position((highlight_word['x_pos'] + 1, highlight_word['y_pos'] + 1)).set_opacity(0.4)
281
+ word_clip_highlight = get_cached_text_clip(highlight_word['word'], font, fontsize, 'white')
282
+ word_clip_highlight = word_clip_highlight.set_start(highlight_word['start']).set_duration(highlight_word['duration'])
283
+ word_clip_highlight = word_clip_highlight.set_position((highlight_word['x_pos'], highlight_word['y_pos']))
284
+ word_clips.append(bg_clip)
285
+ word_clips.append(shadow_highlight)
286
+ word_clips.append(word_clip_highlight)
287
+ return word_clips
288
+
289
  def get_random_subclip_and_slow(clip):
290
  subclip_durations = [2, 3, 4]
291
  subclip_duration = random.choice(subclip_durations)
 
369
  generation_cancelled = False
370
  current_video_clip = None
371
  progress(0, desc="Starting...")
372
+ if generation_cancelled:
373
+ return None, "Generation cancelled"
374
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
375
+
376
+ # CHANGED: Use local paths instead of Google Drive
377
  source_path = 'video_clips'
378
  if not os.path.isdir(source_path):
379
  return None, "Video clips folder not found"
 
380
  output_path = 'exports'
381
  os.makedirs(output_path, exist_ok=True)
382
 
 
384
  all_files = [f for f in os.listdir(source_path) if f.lower().endswith(video_extensions)]
385
  if not all_files:
386
  return None, "No video files found"
 
387
  random.shuffle(all_files)
388
+ if generation_cancelled:
389
+ return None, "Generation cancelled"
390
  bg_music_path = None
391
+
392
+ # CHANGED: Use local background_music folder
393
  bg_music_folder_path = 'background_music'
394
  if os.path.isdir(bg_music_folder_path):
395
+ audio_extensions = ('.mp3', '.wav', '.m4a', '.aac')
396
+ possible_files = [f for f in os.listdir(bg_music_folder_path) if f.lower().endswith(audio_extensions) and not f.startswith('voiceover_')]
397
+ if len(possible_files) >= 1:
398
  bg_music_path = os.path.join(bg_music_folder_path, possible_files[0])
399
+ target_duration_seconds = 0
400
+ voice_over_audio = None
401
  linelevel_subtitles = None
402
+ voice_over_path = None
403
  if text_input and text_input.strip():
404
  progress(0.1, desc="Generating TTS...")
405
  voice_name = AVAILABLE_VOICES[voice_selection]["name"] if voice_selection in AVAILABLE_VOICES else "Puck"
406
  tts_path, tts_message = generate_tts_audio(text_input, voice_name)
407
+ if generation_cancelled:
408
+ return None, "Generation cancelled"
409
  if tts_path:
410
+ # CHANGED: Use local voice_over folder
411
  voice_over_folder_path = 'voice_over'
412
  os.makedirs(voice_over_folder_path, exist_ok=True)
413
  voice_filename = f"tts_voiceover_{timestamp}.wav"
 
417
  else:
418
  return None, f"TTS failed: {tts_message}"
419
  elif audio_input:
420
+ if generation_cancelled:
421
+ return None, "Generation cancelled"
422
  voice_over_folder_path = 'voice_over'
423
  os.makedirs(voice_over_folder_path, exist_ok=True)
424
  voice_filename = f"uploaded_voiceover_{timestamp}.mp3"
425
  saved_voice_path = os.path.join(voice_over_folder_path, voice_filename)
426
  shutil.copy2(audio_input, saved_voice_path)
427
  voice_over_path = saved_voice_path
 
428
  if voice_over_path:
429
  try:
430
  progress(0.2, desc="Processing audio...")
431
+ if generation_cancelled:
432
+ return None, "Generation cancelled"
433
  voice_over_audio = AudioFileClip(voice_over_path)
434
  target_duration_seconds = voice_over_audio.duration
435
  linelevel_subtitles, _ = process_voiceover_to_subtitles(voice_over_path)
436
+ if generation_cancelled:
437
+ voice_over_audio.close()
438
+ return None, "Generation cancelled"
439
  except Exception as e:
440
  return None, f"Audio error: {str(e)}"
441
  else:
442
  if not bg_music_path:
443
  return None, "Need text/audio or background music"
444
  target_duration_seconds = duration_minutes * 60
 
 
445
  progress(0.3, desc="Preparing audio...")
446
+ if generation_cancelled:
447
+ if voice_over_audio:
448
+ voice_over_audio.close()
449
+ return None, "Generation cancelled"
450
  audio_tracks = []
451
  if voice_over_audio:
452
  audio_tracks.append(voice_over_audio)
 
453
  if bg_music_path:
454
  try:
455
  background_audio = AudioFileClip(bg_music_path)
456
+ # CHANGED: Increased volume from 0.015 to 0.10 (louder background music)
457
  background_audio = background_audio.fx(afx.volumex, 0.10)
458
  background_audio = background_audio.fx(afx.audio_loop, duration=target_duration_seconds)
459
  audio_tracks.append(background_audio)
460
  except Exception as e:
461
  print(f"Background music error: {e}")
 
462
  final_audio = CompositeAudioClip(audio_tracks) if len(audio_tracks) > 1 else (audio_tracks[0] if audio_tracks else None)
 
463
  progress(0.4, desc="Setting up video...")
464
+ if generation_cancelled:
465
+ cleanup_resources()
466
+ return None, "Generation cancelled"
467
  if video_quality == "High":
468
  target_height, bitrate, preset, crf = 1080, "8000k", "veryfast", "20"
469
  elif video_quality == "Standard":
470
  target_height, bitrate, preset, crf = 720, "4000k", "veryfast", "24"
471
  else:
472
  target_height, bitrate, preset, crf = 480, "1000k", "ultrafast", "28"
 
473
  progress(0.5, desc="Processing clips...")
 
474
  video_clips = []
475
  current_duration = 0
476
  file_index = 0
477
+ safety_counter = 0
478
+ max_iterations = len(all_files) * 3
479
+ while current_duration < target_duration_seconds and safety_counter < max_iterations:
480
+ if generation_cancelled:
481
+ for clip in video_clips:
482
+ try:
483
+ clip.close()
484
+ except:
485
+ pass
486
+ cleanup_resources()
487
+ return None, "Generation cancelled"
488
  if file_index >= len(all_files):
489
  file_index = 0
490
  random.shuffle(all_files)
 
491
  video_file = all_files[file_index]
492
  file_index += 1
493
+ safety_counter += 1
494
  try:
495
  full_clip = VideoFileClip(os.path.join(source_path, video_file))
496
+ current_video_clip = full_clip
497
+ if generation_cancelled:
498
+ full_clip.close()
499
+ cleanup_resources()
500
+ return None, "Generation cancelled"
501
  if full_clip.h != target_height:
502
  aspect_ratio = full_clip.w / full_clip.h
503
  new_width = int(target_height * aspect_ratio)
 
507
  full_clip = full_clip.resize((new_width, adjusted_height))
508
  else:
509
  full_clip = ensure_even_dimensions(full_clip)
 
510
  subclip = get_random_subclip_and_slow(full_clip)
511
  remaining_duration = target_duration_seconds - current_duration
 
512
  if subclip.duration > remaining_duration:
513
  subclip = subclip.subclip(0, remaining_duration)
 
514
  video_clips.append(ensure_even_dimensions(subclip))
515
  current_duration += subclip.duration
516
+ progress(0.5 + (safety_counter * 0.1 / max_iterations), desc=f"Clip {len(video_clips)}")
517
  except Exception as e:
518
+ print(f"Error: {e}")
519
  continue
520
+ if generation_cancelled:
521
+ for clip in video_clips:
522
+ try:
523
+ clip.close()
524
+ except:
525
+ pass
526
+ cleanup_resources()
527
+ return None, "Generation cancelled"
528
  if not video_clips:
529
  return None, "No clips processed"
530
 
531
+ # FIXED: Ensure exact duration match to prevent black screens
532
  total_video_duration = sum(clip.duration for clip in video_clips)
533
  duration_diff = total_video_duration - target_duration_seconds
 
534
  if abs(duration_diff) > 0.1:
535
  if duration_diff > 0:
536
  trim_amount = duration_diff
537
+ new_last_clip = video_clips[-1].subclip(0, video_clips[-1].duration - trim_amount)
538
+ video_clips[-1] = new_last_clip
539
  else:
540
  extend_amount = abs(duration_diff)
541
+ new_last_clip = video_clips[-1].fx(vfx.loop, duration=video_clips[-1].duration + extend_amount)
542
+ video_clips[-1] = new_last_clip
543
 
544
  progress(0.6, desc="Applying transitions...")
545
  transition_duration = {"Snap Cut": 0.1, "Whip Pan": 0.3, "Dreamy Fade": 0.8, "Smooth Blend": 0.5, "Ken Burns Zoom": 0.5}.get(transition_type, 0.5)
 
546
  processed_clips = []
547
  for i in range(len(video_clips)):
548
  if i == 0:
 
557
  else:
558
  _, clip_with_transition = apply_transition_effect(video_clips[i-1], video_clips[i], transition_type, transition_duration)
559
  processed_clips.append(clip_with_transition)
 
560
  progress(0.7, desc="Concatenating...")
561
+ if generation_cancelled:
562
+ for c in processed_clips:
563
+ try:
564
+ c.close()
565
+ except:
566
+ pass
567
+ cleanup_resources()
568
+ return None, "Generation cancelled"
569
  if transition_type == "Snap Cut":
570
  final_video_only = concatenate_videoclips(processed_clips, method="compose")
571
  else:
572
  final_video_only = concatenate_videoclips(processed_clips, method="compose", padding=-transition_duration)
 
573
  final_video_only = ensure_even_dimensions(final_video_only)
574
+ current_video_clip = final_video_only
575
 
576
+ # FIXED: Loop video if shorter than audio to prevent black screen
577
  if final_audio and final_video_only.duration < final_audio.duration:
578
  final_video_only = final_video_only.fx(vfx.loop, duration=final_audio.duration)
579
 
580
  progress(0.8, desc="Adding overlays...")
581
+ if generation_cancelled:
582
+ try:
583
+ final_video_only.close()
584
+ except:
585
+ pass
586
+ cleanup_resources()
587
+ return None, "Generation cancelled"
588
+ all_subtitle_clips = []
589
  if linelevel_subtitles:
590
+ for line in linelevel_subtitles:
591
+ if generation_cancelled:
592
+ try:
593
+ final_video_only.close()
594
+ except:
595
+ pass
596
+ cleanup_resources()
597
+ return None, "Generation cancelled"
598
+ try:
599
+ subtitle_fontsize = min(42, final_video_only.size[1] // 25)
600
+ all_subtitle_clips.extend(create_caption(line, final_video_only.size, font="Helvetica-Bold", fontsize=subtitle_fontsize, color='white'))
601
+ except Exception as e:
602
+ print(f"Subtitle error: {e}")
603
+ continue
604
+ all_clips = [final_video_only.set_opacity(0.65)]
605
+ if all_subtitle_clips:
606
+ all_clips.extend(all_subtitle_clips)
607
  if title_text and title_text.strip():
608
  title_clips = create_title_overlay(title_text, final_video_only.size, duration=4)
609
  all_clips.extend(title_clips)
 
610
  final_video = CompositeVideoClip(all_clips)
611
+ current_video_clip = final_video
612
  if final_audio:
613
  final_video = final_video.set_audio(final_audio)
 
614
  progress(0.9, desc="Exporting...")
615
+ if generation_cancelled:
616
+ try:
617
+ final_video.close()
618
+ except:
619
+ pass
620
+ cleanup_resources()
621
+ return None, "Generation cancelled"
622
  output_filename = f'video_{timestamp}.mp4'
623
  final_output_path = os.path.join(output_path, output_filename)
 
624
  try:
625
  final_video.write_videofile(
626
  final_output_path,
 
631
  bitrate=bitrate,
632
  audio_bitrate="128k",
633
  threads=8,
634
+ ffmpeg_params=["-crf", crf, "-pix_fmt", "yuv420p", "-movflags", "+faststart", "-tune", "fastdecode"]
635
  )
636
  except Exception as e:
637
+ if generation_cancelled:
638
+ return None, "Generation cancelled"
639
  return None, f"Export error: {str(e)}"
 
640
  progress(1.0, desc="Done")
641
+ if generation_cancelled:
642
+ try:
643
+ if os.path.exists(final_output_path):
644
+ os.remove(final_output_path)
645
+ except:
646
+ pass
647
+ cleanup_resources()
648
+ return None, "Generation cancelled"
649
  try:
650
  final_video.close()
651
  if voice_over_audio:
652
  voice_over_audio.close()
653
+ current_video_clip = None
654
  except:
655
  pass
656
+ audio_source = ""
657
+ if text_input and text_input.strip():
658
+ audio_source = f"TTS ({AVAILABLE_VOICES[voice_selection]['name'] if voice_selection in AVAILABLE_VOICES else 'Puck'})"
659
+ elif voice_over_path:
660
+ audio_source = "Uploaded Audio"
661
+ else:
662
+ audio_source = "Background Music"
663
+ summary = f"Complete\n{output_filename}\n{audio_source}\n{transition_type}\n{target_duration_seconds:.1f}s\n{len(linelevel_subtitles) if linelevel_subtitles else 0} subtitles"
664
  return final_output_path, summary
665
 
666
+ # CHANGED: Removed share=True and debug=True for production
667
  with gr.Blocks(title="Video Generator", theme=gr.themes.Soft()) as interface:
668
  gr.Markdown("# 🎬 AI Video Generator")
669
+ gr.Markdown("Upload video clips to `video_clips` folder and optionally background music to `background_music` folder.")
670
 
671
  with gr.Row():
672
  with gr.Column():
673
+ text_input = gr.Textbox(label="Text for TTS", lines=4, placeholder="Enter text to convert to speech...")
674
  voice_dropdown = gr.Dropdown(
675
  choices=[(f"{v['name']} - {v['description']}", k) for k, v in AVAILABLE_VOICES.items()],
676
  value="Puck",
677
+ label="Voice Selection"
678
  )
679
+ audio_input = gr.Audio(type="filepath", label="Or Upload Audio File")
680
+ title_input = gr.Textbox(label="Video Title (Optional)", lines=2, placeholder="Enter video title...")
681
+ duration_slider = gr.Slider(0.5, 10, 2, 0.5, label="Duration (minutes) - only used if no audio")
682
+ quality_radio = gr.Radio(["High", "Standard", "Preview"], value="High", label="Video Quality")
683
  transition_radio = gr.Radio(
684
  ["Smooth Blend", "Ken Burns Zoom", "Whip Pan", "Dreamy Fade", "Snap Cut"],
685
  value="Smooth Blend",
686
+ label="Transition Effect"
687
  )
688
  with gr.Row():
689
+ submit_btn = gr.Button("🎥 Generate Video", variant="primary", size="lg")
690
+ stop_btn = gr.Button("⏹️ Stop", variant="stop", size="lg")
691
 
692
  with gr.Column():
693
+ video_output = gr.Video(label="Generated Video")
694
  summary_output = gr.Textbox(label="Status", lines=8)
695
 
696
  submit_btn.click(
 
700
  )
701
  stop_btn.click(fn=cancel_generation, outputs=[summary_output, video_output])
702
 
703
+ # CHANGED: Updated launch settings for Hugging Face
704
  if __name__ == "__main__":
705
+ interface.launch(
706
+ server_name="0.0.0.0",
707
+ server_port=7860,
708
+ show_error=True
709
+ )