sampleacc-3003 commited on
Commit
a6f03b3
Β·
verified Β·
1 Parent(s): 3d7c01a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +573 -455
app.py CHANGED
@@ -7,615 +7,733 @@ import requests
7
  import re
8
  import textwrap
9
  import shutil
10
- import time
11
  from datetime import datetime
12
  from PIL import Image, ImageDraw, ImageFont
13
- from functools import lru_cache
14
 
15
  # ========================================
16
  # CONFIGURATION SECTION - CUSTOMIZE HERE
17
  # ========================================
18
 
 
19
  REDDIT_CONFIG = {
20
- 'template_file': 'reddit_template.png',
21
- 'font_file': 'RFDewi-Semibold.ttf',
22
- 'font_size_max': 120,
23
- 'font_size_min': 16,
24
- 'text_wrap_width': 50,
25
- 'text_color': 'black',
26
- 'line_spacing': 10,
27
- 'text_box_width_percent': 0.8,
28
- 'text_box_height_percent': 0.5,
29
- 'y_offset': 20,
30
  }
31
 
 
32
  SUBTITLE_CONFIG = {
33
- 'font_file': 'komiko_axis.ttf',
34
- 'font_name': 'Komika Axis',
35
- 'font_size_default': 18,
36
- 'position_alignment': 5,
37
- 'margin_left': 10,
38
- 'margin_right': 10,
39
  'margin_vertical': 0,
40
  }
41
 
 
42
  VIDEO_CONFIG = {
43
- 'reddit_scale_percent': 0.65,
44
- 'fade_start_percent': 0.6,
45
- 'fade_end_percent': 0.75,
46
- 'promo_percent': 0.1,
47
- 'fade_color_rgb': (218, 207, 195),
48
- 'fade_color_hex': '#DACFC3',
49
- 'book_fade_in_duration': 2,
50
- # Performance settings
51
- 'encoding_preset': 'faster', # Options: ultrafast, superfast, veryfast, faster, fast, medium
52
- 'threads': 0, # 0 = auto-detect
53
  }
54
 
55
  # ========================================
56
- # END CONFIGURATION
57
  # ========================================
58
 
 
59
  static_ffmpeg.add_paths()
60
 
61
- # Utility Functions
62
- def load_font(font_paths, font_size, fallback='Verdana'):
63
- """Load font from multiple locations with fallback."""
64
- for path in font_paths:
65
- if os.path.exists(path):
66
- try:
67
- return ImageFont.truetype(path, font_size)
68
- except:
69
- pass
70
- try:
71
- return ImageFont.truetype(fallback, font_size)
72
- except:
73
- return ImageFont.load_default()
74
-
75
- def time_to_seconds(time_str):
76
- """Convert SRT time to seconds."""
77
- h, m, s = time_str.split(':')
78
- s, ms = s.split(',')
79
- return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
80
-
81
- def format_elapsed_time(seconds):
82
- """Format elapsed time as MM:SS."""
83
- mins = int(seconds // 60)
84
- secs = int(seconds % 60)
85
- return f"{mins}:{secs:02d}"
86
-
87
- def run_ffmpeg_cmd(cmd, env, description="", start_time=None):
88
- """Execute FFmpeg command with error handling and timing."""
89
- step_start = time.time()
90
- try:
91
- subprocess.run(cmd, check=True, capture_output=True, text=True, env=env)
92
- elapsed = time.time() - step_start
93
- total_elapsed = time.time() - start_time if start_time else elapsed
94
- return True, None, f"βœ… {description} ({elapsed:.1f}s) | Total: {format_elapsed_time(total_elapsed)}"
95
- except subprocess.CalledProcessError as e:
96
- error_msg = e.stderr[-1000:] if e.stderr else str(e)
97
- return False, f"{description} failed: {error_msg}", None
98
-
99
- # Font Setup
100
  def setup_custom_fonts_hf(temp_dir):
101
- """Setup fonts for HF Spaces compatibility."""
 
 
 
 
 
 
 
 
 
 
 
 
102
  try:
103
  fonts_dir = os.path.join(temp_dir, 'fonts')
104
  os.makedirs(fonts_dir, exist_ok=True)
105
 
 
106
  script_dir = os.path.dirname(os.path.abspath(__file__))
 
 
 
107
  fonts_to_copy = []
108
 
109
- # Check fonts/ subdirectory
110
- repo_fonts_dir = os.path.join(script_dir, 'fonts')
111
  if os.path.exists(repo_fonts_dir):
112
- fonts_to_copy.extend([
113
- os.path.join(repo_fonts_dir, f)
114
- for f in os.listdir(repo_fonts_dir)
115
- if f.lower().endswith(('.ttf', '.otf'))
116
- ])
117
-
118
- # Check root directory
119
- for font_file in [REDDIT_CONFIG['font_file'], SUBTITLE_CONFIG['font_file']]:
120
- font_path = os.path.join(script_dir, font_file)
121
  if os.path.exists(font_path) and font_path not in fonts_to_copy:
122
  fonts_to_copy.append(font_path)
123
 
124
- # Copy fonts
125
  for src in fonts_to_copy:
126
- shutil.copy(src, os.path.join(fonts_dir, os.path.basename(src)))
 
127
 
128
  if fonts_to_copy:
129
- with open(os.path.join(temp_dir, 'fonts.conf'), 'w') as f:
130
- f.write(f"""<?xml version="1.0"?>
131
  <fontconfig>
132
  <dir>{fonts_dir}</dir>
133
  <cachedir>{temp_dir}/cache</cachedir>
134
- </fontconfig>""")
 
 
 
 
135
 
 
136
  env = os.environ.copy()
137
- env['FONTCONFIG_FILE'] = os.path.join(temp_dir, 'fonts.conf')
138
  env['FONTCONFIG_PATH'] = temp_dir
139
  return env
140
 
 
141
  return os.environ.copy()
142
- except:
 
143
  return os.environ.copy()
144
 
145
- # File Handling
146
  def download_file_from_url(url, output_dir, filename):
147
- """Download file from URL."""
148
- response = requests.get(url, stream=True, timeout=30)
149
- response.raise_for_status()
150
-
151
- file_path = os.path.join(output_dir, filename)
152
- with open(file_path, 'wb') as f:
153
- for chunk in response.iter_content(chunk_size=8192):
154
- f.write(chunk)
155
- return file_path
 
 
 
 
156
 
157
  def download_book_cover(book_id, output_dir):
158
- """Download book cover from Google Books."""
159
- url = f"https://books.google.com/books/publisher/content/images/frontcover/{book_id}?fife=w720-h1280&source=gbs_api"
160
- response = requests.get(url, timeout=30)
161
- response.raise_for_status()
162
-
163
- path = os.path.join(output_dir, 'book_cover.png')
164
- with open(path, 'wb') as f:
165
- f.write(response.content)
166
-
167
- Image.open(path).verify()
168
- return path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
171
- """Validate file input and return path."""
172
  has_upload = uploaded_file is not None
173
  has_url = url_string and url_string.strip()
174
 
175
  if not has_upload and not has_url:
176
- return None, f"❌ Provide {file_type} via upload or URL"
 
177
  if has_upload and has_url:
178
- return None, f"❌ Use only ONE method for {file_type}"
179
-
180
  if has_upload:
181
- return (uploaded_file.name if hasattr(uploaded_file, 'name') else uploaded_file), None
182
-
183
- try:
184
- url = url_string.strip()
185
- filename = url.split('/')[-1] or f"{file_type}_file"
186
-
187
- if '.' not in filename:
188
- ext_map = {'video': '.mp4', 'audio': '.wav', 'subtitle': '.srt'}
189
- filename += ext_map.get(file_type, '.tmp')
190
-
191
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
192
- return download_file_from_url(url, temp_dir, f"{file_type}_{timestamp}_{filename}"), None
193
- except Exception as e:
194
- return None, f"❌ Download error: {str(e)}"
195
 
196
- # Media Info (Cached)
197
- @lru_cache(maxsize=32)
198
- def get_video_info(video_path):
199
- """Get video resolution and frame rate (cached)."""
200
- result = subprocess.run([
201
- "ffprobe", "-v", "error", "-select_streams", "v:0",
202
- "-show_entries", "stream=width,height", "-of", "csv=s=x:p=0", video_path
203
- ], capture_output=True, text=True, check=True)
204
- width, height = map(int, result.stdout.strip().split('x'))
205
-
206
- result = subprocess.run([
207
- "ffprobe", "-v", "error", "-select_streams", "v:0",
208
- "-show_entries", "stream=r_frame_rate", "-of", "default=noprint_wrappers=1:nokey=1", video_path
209
- ], capture_output=True, text=True, check=True)
210
- fps_str = result.stdout.strip()
211
-
212
- fps = eval(fps_str) if '/' in fps_str else float(fps_str)
213
- return width, height, fps
214
 
215
- @lru_cache(maxsize=32)
216
- def get_audio_duration(audio_path):
217
- """Get audio duration (cached)."""
218
- result = subprocess.run([
219
- "ffprobe", "-v", "error", "-show_entries", "format=duration",
220
- "-of", "default=noprint_wrappers=1:nokey=1", audio_path
221
- ], capture_output=True, text=True, check=True)
222
- return float(result.stdout.strip())
223
-
224
- # Subtitle Processing
225
- def extract_first_subtitle(srt_path):
226
- """Extract first subtitle entry."""
227
- with open(srt_path, 'r', encoding='utf-8') as f:
228
- blocks = re.split(r'\n\s*\n', f.read().strip())
229
-
230
- if not blocks:
231
- return "No subtitle", 0.0, 3.0
232
-
233
- lines = blocks[0].strip().split('\n')
234
- if len(lines) >= 3:
235
- times = lines[1].split(' --> ')
236
- return ' '.join(lines[2:]).strip(), time_to_seconds(times[0].strip()), time_to_seconds(times[1].strip())
237
-
238
- return "No subtitle", 0.0, 3.0
239
 
240
  def srt_time_to_ms(time_str):
241
  """Convert SRT timestamp to milliseconds."""
242
- h, m, s = time_str.strip().split(':')
243
- s, ms = s.split(',')
244
- return int(h) * 3600000 + int(m) * 60000 + int(s) * 1000 + int(ms)
 
 
245
 
246
  def ms_to_ass_time(ms):
247
- """Convert milliseconds to ASS timestamp."""
248
- h = ms // 3600000
249
  ms %= 3600000
250
- m = ms // 60000
251
  ms %= 60000
252
- s = ms // 1000
253
- cs = (ms % 1000) // 10
254
- return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
255
-
256
- def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
257
- """Create Reddit card with text using PIL."""
258
- template = Image.open(template_path).convert('RGBA')
259
- tw, th = template.size
260
-
261
- text_box_w = int(tw * config['text_box_width_percent'])
262
- text_box_h = int(th * config['text_box_height_percent'])
263
-
264
- script_dir = os.path.dirname(os.path.abspath(__file__))
265
- font_paths = [
266
- os.path.join(script_dir, 'fonts', config['font_file']),
267
- os.path.join(script_dir, config['font_file'])
268
- ]
269
-
270
- # Find best font size
271
- best_size = config['font_size_max']
272
- best_wrapped = hook_text
273
-
274
- for size in range(config['font_size_max'], config['font_size_min'] - 1, -2):
275
- font = load_font(font_paths, size)
276
- wrapped = textwrap.fill(hook_text, width=config['text_wrap_width'])
277
-
278
- draw = ImageDraw.Draw(template)
279
- bbox = draw.multiline_textbbox((0, 0), wrapped, font=font, spacing=config['line_spacing'])
280
-
281
- if bbox[2] <= text_box_w and bbox[3] <= text_box_h:
282
- best_size = size
283
- best_wrapped = wrapped
284
- break
285
-
286
- # Draw text
287
- font = load_font(font_paths, best_size)
288
- draw = ImageDraw.Draw(template)
289
- bbox = draw.multiline_textbbox((0, 0), best_wrapped, font=font, spacing=config['line_spacing'])
290
-
291
- x = (tw - bbox[2]) / 2
292
- y = (th - bbox[3]) / 2 + config['y_offset']
293
-
294
- draw.multiline_text((x, y), best_wrapped, fill=config['text_color'],
295
- font=font, spacing=config['line_spacing'], align='left')
296
-
297
- output_path = os.path.join(output_dir, 'reddit_card.png')
298
- template.save(output_path, 'PNG')
299
- return output_path
300
 
301
  def create_word_by_word_highlight_ass(srt_path, output_dir, highlight_color='yellow',
302
- font_size=None, skip_first=False, config=SUBTITLE_CONFIG):
303
- """Convert SRT to ASS with word highlighting."""
304
- font_size = font_size or config['font_size_default']
 
 
 
 
305
 
306
  color_map = {
307
- 'yellow': ('&H0000FFFF', '&H00000000'), 'orange': ('&H0000A5FF', '&H00000000'),
308
- 'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'),
309
- 'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'),
 
 
 
310
  'blue': ('&H00FF0000', '&H00FFFFFF'),
311
  }
 
312
  highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H0000FFFF', '&H00000000'))
313
 
314
  with open(srt_path, 'r', encoding='utf-8') as f:
315
  srt_content = f.read()
316
 
 
 
317
  ass_header = f"""[Script Info]
318
- Title: Word Highlight
319
  ScriptType: v4.00+
 
 
 
320
  [V4+ Styles]
321
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
322
  Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,100,0,0,1,2,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
 
323
  [Events]
324
  Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
325
  """
326
 
327
- ass_events = []
328
  srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
329
- start_idx = 1 if skip_first else 0
 
330
 
331
- for block in srt_blocks[start_idx:]:
332
  lines = block.strip().split('\n')
333
- if len(lines) < 3:
334
- continue
335
-
336
- times = lines[1].split(' --> ')
337
- if len(times) != 2:
338
- continue
339
-
340
- start_ms = srt_time_to_ms(times[0])
341
- end_ms = srt_time_to_ms(times[1])
342
- words = ' '.join(lines[2:]).split()
343
-
344
- if not words:
345
- continue
346
-
347
- time_per_word = (end_ms - start_ms) / len(words)
348
-
349
- for i, word in enumerate(words):
350
- word_start = start_ms + int(i * time_per_word)
351
- word_end = end_ms if i == len(words) - 1 else start_ms + int((i + 1) * time_per_word)
352
-
353
- styled_words = [
354
- f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{w}{{\\r}}" if j == i else w
355
- for j, w in enumerate(words)
356
- ]
357
-
358
- ass_events.append(
359
- f"Dialogue: 0,{ms_to_ass_time(word_start)},{ms_to_ass_time(word_end)},Default,,0,0,0,,{' '.join(styled_words)}"
360
- )
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
- ass_path = os.path.join(output_dir, 'word_highlight.ass')
363
- with open(ass_path, 'w') as f:
364
- f.write(ass_header + '\n'.join(ass_events))
365
-
366
  return ass_path
367
 
368
- # Main Processing
369
- def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, subtitle_url,
370
- book_id, enable_highlight, highlight_color, font_size, crf_quality=23):
371
- """Main stitching function - OPTIMIZED with timing."""
372
- # START TIMER
373
- start_time = time.time()
 
 
 
 
 
374
  temp_dir = tempfile.mkdtemp()
375
 
376
  try:
 
377
  ffmpeg_env = setup_custom_fonts_hf(temp_dir)
378
 
379
- # Validate inputs
380
- video_path, err = validate_and_get_file(video_file, video_url, 'video', temp_dir)
381
- if err: return None, err
382
-
383
- audio_path, err = validate_and_get_file(audio_file, audio_url, 'audio', temp_dir)
384
- if err: return None, err
385
 
386
- subtitle_path, err = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
387
- if err: return None, err
388
 
389
- setup_time = time.time() - start_time
 
390
 
391
- # Get media info
392
  video_width, video_height, video_fps = get_video_info(video_path)
393
  audio_duration = get_audio_duration(audio_path)
394
 
395
- status = f"⏱️ Setup: {setup_time:.1f}s\n"
396
- status += f"πŸ“₯ {video_width}x{video_height}@{video_fps:.0f}fps | {audio_duration:.1f}s\n\n"
 
 
397
 
398
- # Reddit overlay
399
  script_dir = os.path.dirname(os.path.abspath(__file__))
400
  reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
401
- has_reddit = os.path.exists(reddit_template_path)
402
-
403
- if has_reddit:
404
- reddit_start = time.time()
405
- first_text, first_start, first_end = extract_first_subtitle(subtitle_path)
406
- reddit_card_path = create_reddit_card_with_text(reddit_template_path, first_text, temp_dir)
407
- reddit_time = time.time() - reddit_start
408
- status += f"πŸ“± Reddit card: βœ… ({reddit_time:.1f}s)\n"
409
-
410
- # Generate subtitles
411
- sub_start = time.time()
412
- subtitle_ass = create_word_by_word_highlight_ass(
413
- subtitle_path, temp_dir, highlight_color, font_size,
414
- skip_first=has_reddit, config=SUBTITLE_CONFIG
415
- ) if enable_highlight else subtitle_path
416
- sub_time = time.time() - sub_start
417
- status += f"πŸ“ Subtitles: βœ… ({sub_time:.1f}s)\n\n"
418
-
419
- subtitle_escaped = subtitle_ass.replace('\\', '/').replace(':', '\\:')
420
-
421
- # Output setup
422
- timestamp = datetime.now().strftime("%H%M%S")
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
424
- has_book = book_id and book_id.strip()
425
-
426
- # Calculate timings
427
- fade_start = audio_duration * VIDEO_CONFIG['fade_start_percent']
428
- fade_end = audio_duration * VIDEO_CONFIG['fade_end_percent']
429
- fade_duration = fade_end - fade_start
430
- promo_duration = audio_duration * VIDEO_CONFIG['promo_percent']
431
- book_start = audio_duration - promo_duration
432
- solid_duration = book_start - fade_end
433
-
434
- # Common encoding flags (OPTIMIZED!)
435
- common_encode_flags = [
436
- "-c:v", "libx264",
437
- "-preset", VIDEO_CONFIG['encoding_preset'],
438
- "-crf", str(crf_quality),
439
- "-pix_fmt", "yuv420p",
440
- "-threads", str(VIDEO_CONFIG['threads'])
441
- ]
442
 
443
- if has_book:
444
- status += "🎬 Encoding with book cover:\n\n"
445
- book_cover_path = download_book_cover(book_id.strip(), temp_dir)
446
-
447
- segments = []
448
-
449
- # STEP 1: Main video
450
- main_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
451
- success, error, timing = run_ffmpeg_cmd([
452
- "ffmpeg", "-hwaccel", "auto",
453
- "-stream_loop", "-1", "-i", video_path, "-t", str(fade_end),
454
- "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_start}:d={fade_duration}:c={VIDEO_CONFIG['fade_color_hex']}",
455
- *common_encode_flags, "-an", "-y", main_path
456
- ], ffmpeg_env, "Step 1/4: Main video", start_time)
457
- if not success: return None, error
458
- status += f"{timing}\n"
459
- segments.append(main_path)
460
-
461
- # STEP 2: Solid color
462
- if solid_duration > 0:
463
- solid_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
464
- success, error, timing = run_ffmpeg_cmd([
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  "ffmpeg", "-f", "lavfi",
466
- "-i", f"color=c={VIDEO_CONFIG['fade_color_hex']}:s={video_width}x{video_height}:d={solid_duration}:r={video_fps}",
467
- *common_encode_flags, "-y", solid_path
468
- ], ffmpeg_env, "Step 2/4: Solid color", start_time)
469
- if not success: return None, error
470
- status += f"{timing}\n"
471
- segments.append(solid_path)
472
-
473
- # STEP 3: Book cover
474
- cover_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
475
- success, error, timing = run_ffmpeg_cmd([
476
- "ffmpeg", "-hwaccel", "auto",
477
- "-loop", "1", "-i", book_cover_path, "-t", str(promo_duration),
478
- "-vf", f"scale={video_width}:{video_height}:force_original_aspect_ratio=decrease,pad={video_width}:{video_height}:(ow-iw)/2:(oh-ih)/2:color={VIDEO_CONFIG['fade_color_hex']},setsar=1,fps={video_fps},fade=t=in:st=0:d={VIDEO_CONFIG['book_fade_in_duration']}:c={VIDEO_CONFIG['fade_color_hex']}",
479
- *common_encode_flags, "-an", "-y", cover_path
480
- ], ffmpeg_env, "Step 3/4: Book cover", start_time)
481
- if not success: return None, error
482
- status += f"{timing}\n"
483
- segments.append(cover_path)
484
-
485
- # STEP 4: Final assembly
486
- concat_list = os.path.join(temp_dir, f"concat_{timestamp}.txt")
487
- with open(concat_list, 'w') as f:
488
- f.write('\n'.join(f"file '{s}'" for s in segments))
489
-
490
- if has_reddit:
491
- filter_complex = (
492
- f"[0:v]ass={subtitle_escaped}[bg];"
493
- f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
494
- f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_start},{first_end})'[v]"
495
- )
496
- cmd = [
497
- "ffmpeg", "-hwaccel", "auto",
498
- "-f", "concat", "-safe", "0", "-i", concat_list,
499
- "-loop", "1", "-i", reddit_card_path, "-i", audio_path,
500
- "-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
501
- *common_encode_flags, "-c:a", "aac", "-shortest", "-y", output_path
502
  ]
503
- else:
504
- cmd = [
505
- "ffmpeg", "-hwaccel", "auto",
506
- "-f", "concat", "-safe", "0", "-i", concat_list, "-i", audio_path,
507
- "-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
508
- *common_encode_flags, "-c:a", "aac", "-shortest", "-y", output_path
 
 
 
 
509
  ]
510
-
511
- success, error, timing = run_ffmpeg_cmd(cmd, ffmpeg_env, "Step 4/4: Final", start_time)
512
- if not success: return None, error
513
- status += f"{timing}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
 
515
  else:
516
- # Simple loop (no book)
517
- status += "🎬 Encoding:\n\n"
518
 
519
- if has_reddit:
520
  filter_complex = (
521
  f"[0:v]ass={subtitle_escaped}[bg];"
522
  f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
523
- f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_start},{first_end})'[v]"
524
  )
525
  cmd = [
526
- "ffmpeg", "-hwaccel", "auto",
527
- "-stream_loop", "-1", "-i", video_path,
528
  "-loop", "1", "-i", reddit_card_path, "-i", audio_path,
529
  "-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
530
- *common_encode_flags, "-c:a", "aac", "-shortest", "-y", output_path
 
531
  ]
532
  else:
533
  cmd = [
534
- "ffmpeg", "-hwaccel", "auto",
535
- "-stream_loop", "-1", "-i", video_path, "-i", audio_path,
536
  "-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
537
- *common_encode_flags, "-c:a", "aac", "-shortest", "-y", output_path
 
538
  ]
539
 
540
- success, error, timing = run_ffmpeg_cmd(cmd, ffmpeg_env, "Video encoding", start_time)
541
- if not success: return None, error
542
- status += f"{timing}\n"
543
-
544
- # Success - Calculate total time
545
- total_time = time.time() - start_time
546
 
 
547
  if os.path.exists(output_path):
548
- size_mb = os.path.getsize(output_path) / (1024 * 1024)
549
- success_msg = f"""βœ… VIDEO COMPLETE!
550
-
551
- πŸ“Š File: {size_mb:.1f}MB | Duration: {audio_duration:.1f}s
552
- ⏱️ TOTAL TIME: {format_elapsed_time(total_time)} ({total_time:.1f}s)
553
- ⚑ Preset: {VIDEO_CONFIG['encoding_preset']} | Threads: {VIDEO_CONFIG['threads']}
554
-
555
- ──────────────────────────
556
- {status}"""
557
  return output_path, success_msg
558
-
559
- return None, "❌ Output not created"
560
 
561
  except Exception as e:
562
- total_time = time.time() - start_time
563
- return None, f"❌ Error after {format_elapsed_time(total_time)}: {str(e)}"
564
 
565
  # Gradio UI
566
  with gr.Blocks(title="Video Stitcher", theme=gr.themes.Soft()) as app:
567
- gr.Markdown(f"""
568
- # 🎬 Video Stitcher ⚑ OPTIMIZED
569
-
570
- **Performance:** Hardware accel + {VIDEO_CONFIG['encoding_preset']} preset + multi-threading
571
- **Config:** Reddit={REDDIT_CONFIG['font_file']} | Subtitle={SUBTITLE_CONFIG['font_name']}
572
-
573
- **Expected:** 3-4 minutes (was 6 minutes) - 30-50% faster! πŸš€
574
- """)
 
 
 
 
575
 
576
  with gr.Row():
577
  with gr.Column():
 
578
  with gr.Group():
579
- gr.Markdown("**πŸ“Ή Video**")
580
  video_input = gr.File(label="Upload", file_types=[".mp4", ".mov", ".avi", ".mkv"], type="filepath")
581
- video_url_input = gr.Textbox(label="OR URL", placeholder="https://...")
 
582
 
 
583
  with gr.Group():
584
- gr.Markdown("**🎡 Audio**")
585
- audio_input = gr.File(label="Upload", file_types=[".wav", ".mp3", ".aac"], type="filepath")
586
- audio_url_input = gr.Textbox(label="OR URL", placeholder="https://...")
587
 
 
588
  with gr.Group():
589
- gr.Markdown("**πŸ“ Subtitle**")
590
  subtitle_input = gr.File(label="Upload (.srt)", file_types=[".srt"], type="filepath")
591
- subtitle_url_input = gr.Textbox(label="OR URL", placeholder="https://...")
 
592
 
593
- book_id_input = gr.Textbox(label="πŸ“š Book ID (Optional)", placeholder="wyaEDwAAQBAJ")
 
594
 
595
- with gr.Row():
596
- enable_highlight = gr.Checkbox(label="Highlight", value=True)
597
- highlight_color = gr.Dropdown(choices=['yellow', 'orange', 'green', 'cyan', 'pink', 'red', 'blue'],
598
- value='yellow', label="Color")
599
- with gr.Row():
600
- font_size = gr.Slider(12, 32, 18, step=2, label="Font Size")
601
- crf_input = gr.Slider(18, 28, 23, step=1, label="Quality")
602
 
603
  stitch_btn = gr.Button("🎬 Stitch Video", variant="primary", size="lg")
604
 
605
  with gr.Column():
606
- status_output = gr.Textbox(label="Status", lines=12)
 
607
  video_output = gr.Video(label="Result")
608
 
609
- gr.Markdown("""
610
- ### ⚑ Optimizations Applied:
611
- - βœ… Hardware acceleration (`-hwaccel auto`)
612
- - βœ… Faster encoding preset
613
- - βœ… Multi-threading (auto CPU cores)
614
- - βœ… Cached media info
615
- - βœ… **Real-time execution tracking**
616
-
617
- **Timeline shown for each step + total time!**
618
- """)
 
 
 
619
 
620
  stitch_btn.click(
621
  fn=stitch_media,
@@ -626,4 +744,4 @@ with gr.Blocks(title="Video Stitcher", theme=gr.themes.Soft()) as app:
626
  )
627
 
628
  if __name__ == "__main__":
629
- app.launch(show_error=True)
 
7
  import re
8
  import textwrap
9
  import shutil
 
10
  from datetime import datetime
11
  from PIL import Image, ImageDraw, ImageFont
 
12
 
13
  # ========================================
14
  # CONFIGURATION SECTION - CUSTOMIZE HERE
15
  # ========================================
16
 
17
+ # Reddit Template Text Settings
18
  REDDIT_CONFIG = {
19
+ 'template_file': 'reddit_template.png', # Template filename in script directory
20
+ 'font_file': 'RFDewi-Bold.ttf', # Font file for Reddit text
21
+ 'font_size_max': 180, # Maximum font size to try
22
+ 'font_size_min': 16, # Minimum font size (if text too long)
23
+ 'text_wrap_width': 35, # Characters per line for wrapping
24
+ 'text_color': 'black', # Text color
25
+ 'line_spacing': 10, # Spacing between lines
26
+ 'text_box_width_percent': 0.85, # 80% of template width
27
+ 'text_box_height_percent': 0.65, # 50% of template height
28
+ 'y_offset': 20, # Vertical offset from center
29
  }
30
 
31
+ # Word-by-Word Subtitle Settings
32
  SUBTITLE_CONFIG = {
33
+ 'font_file': 'komiko_axis.ttf', # Font file for subtitles (TTF or OTF)
34
+ 'font_name': 'Komika Axis', # Font name as it appears in system
35
+ 'font_size_default': 12, # Default subtitle font size
36
+ 'position_alignment': 5, # 5 = center (1-9 numpad layout)
37
+ 'margin_left': 20,
38
+ 'margin_right': 20,
39
  'margin_vertical': 0,
40
  }
41
 
42
+ # Video Processing Settings
43
  VIDEO_CONFIG = {
44
+ 'reddit_scale_percent': 0.75, # Reddit template size (0.75 = 75% of video width)
45
+ 'fade_start_percent': 0.70, # When fade to color starts (60%)
46
+ 'fade_end_percent': 0.83, # When fully faded to color (75%)
47
+ 'promo_percent': 0.1, # Last 10% for book cover
48
+ 'fade_color_rgb': (218, 207, 195), # Fade color RGB
49
+ 'book_fade_in_duration': 2, # Book cover fade-in duration (seconds)
 
 
 
 
50
  }
51
 
52
  # ========================================
53
+ # END CONFIGURATION SECTION
54
  # ========================================
55
 
56
+ # Add static ffmpeg to PATH
57
  static_ffmpeg.add_paths()
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  def setup_custom_fonts_hf(temp_dir):
60
+ """
61
+ Setup custom fonts for FFmpeg/libass - Hugging Face Spaces compatible.
62
+
63
+ File Structure Required:
64
+ project/
65
+ β”œβ”€β”€ app.py
66
+ β”œβ”€β”€ fonts/
67
+ β”‚ β”œβ”€β”€ komiko_axis.ttf (or your fonts)
68
+ β”‚ └── (other fonts...)
69
+ └── reddit_template.png
70
+
71
+ Returns: environment dict with FONTCONFIG configured
72
+ """
73
  try:
74
  fonts_dir = os.path.join(temp_dir, 'fonts')
75
  os.makedirs(fonts_dir, exist_ok=True)
76
 
77
+ # Get script directory and check for fonts/ subdirectory
78
  script_dir = os.path.dirname(os.path.abspath(__file__))
79
+ repo_fonts_dir = os.path.join(script_dir, 'fonts')
80
+
81
+ # Also check for fonts in script root (fallback)
82
  fonts_to_copy = []
83
 
84
+ # Check fonts/ subdirectory first
 
85
  if os.path.exists(repo_fonts_dir):
86
+ for font_file in os.listdir(repo_fonts_dir):
87
+ if font_file.endswith(('.ttf', '.otf', '.TTF', '.OTF')):
88
+ fonts_to_copy.append(os.path.join(repo_fonts_dir, font_file))
89
+
90
+ # Check script root directory for fonts
91
+ for item in [REDDIT_CONFIG['font_file'], SUBTITLE_CONFIG['font_file']]:
92
+ font_path = os.path.join(script_dir, item)
 
 
93
  if os.path.exists(font_path) and font_path not in fonts_to_copy:
94
  fonts_to_copy.append(font_path)
95
 
96
+ # Copy all found fonts
97
  for src in fonts_to_copy:
98
+ dst = os.path.join(fonts_dir, os.path.basename(src))
99
+ shutil.copy(src, dst)
100
 
101
  if fonts_to_copy:
102
+ # Create fonts.conf for fontconfig
103
+ fonts_conf = f"""<?xml version="1.0"?>
104
  <fontconfig>
105
  <dir>{fonts_dir}</dir>
106
  <cachedir>{temp_dir}/cache</cachedir>
107
+ </fontconfig>
108
+ """
109
+ conf_path = os.path.join(temp_dir, 'fonts.conf')
110
+ with open(conf_path, 'w') as f:
111
+ f.write(fonts_conf)
112
 
113
+ # Set environment variables
114
  env = os.environ.copy()
115
+ env['FONTCONFIG_FILE'] = conf_path
116
  env['FONTCONFIG_PATH'] = temp_dir
117
  return env
118
 
119
+ # Fallback to normal environment
120
  return os.environ.copy()
121
+
122
+ except Exception as e:
123
  return os.environ.copy()
124
 
 
125
  def download_file_from_url(url, output_dir, filename):
126
+ """Download a file from URL and save it to output directory."""
127
+ try:
128
+ response = requests.get(url, stream=True, timeout=30)
129
+ response.raise_for_status()
130
+
131
+ file_path = os.path.join(output_dir, filename)
132
+ with open(file_path, 'wb') as f:
133
+ for chunk in response.iter_content(chunk_size=8192):
134
+ f.write(chunk)
135
+
136
+ return file_path
137
+ except Exception as e:
138
+ raise Exception(f"Failed to download file from URL: {str(e)}")
139
 
140
  def download_book_cover(book_id, output_dir):
141
+ """Download book cover from Google Books API using Book ID."""
142
+ try:
143
+ image_url = f"https://books.google.com/books/publisher/content/images/frontcover/{book_id}?fife=w400-h600&source=gbs_api"
144
+
145
+ response = requests.get(image_url, timeout=30)
146
+ response.raise_for_status()
147
+
148
+ image_path = os.path.join(output_dir, 'book_cover.png')
149
+ with open(image_path, 'wb') as f:
150
+ f.write(response.content)
151
+
152
+ img = Image.open(image_path)
153
+ img.verify()
154
+
155
+ return image_path
156
+ except Exception as e:
157
+ raise Exception(f"Failed to download book cover: {str(e)}")
158
+
159
+ def get_video_info(video_path):
160
+ """Get video resolution and frame rate using ffprobe."""
161
+ try:
162
+ cmd_res = [
163
+ "ffprobe", "-v", "error", "-select_streams", "v:0",
164
+ "-show_entries", "stream=width,height", "-of", "csv=s=x:p=0", video_path
165
+ ]
166
+ result = subprocess.run(cmd_res, capture_output=True, text=True, check=True)
167
+ width, height = result.stdout.strip().split('x')
168
+
169
+ cmd_fps = [
170
+ "ffprobe", "-v", "error", "-select_streams", "v:0",
171
+ "-show_entries", "stream=r_frame_rate", "-of", "default=noprint_wrappers=1:nokey=1", video_path
172
+ ]
173
+ result = subprocess.run(cmd_fps, capture_output=True, text=True, check=True)
174
+ fps_str = result.stdout.strip()
175
+
176
+ if '/' in fps_str:
177
+ num, den = fps_str.split('/')
178
+ fps = float(num) / float(den)
179
+ else:
180
+ fps = float(fps_str)
181
+
182
+ return int(width), int(height), fps
183
+ except Exception as e:
184
+ raise Exception(f"Failed to get video info: {str(e)}")
185
+
186
+ def get_audio_duration(audio_path):
187
+ """Get audio duration in seconds using ffprobe."""
188
+ try:
189
+ cmd = [
190
+ "ffprobe", "-v", "error", "-show_entries", "format=duration",
191
+ "-of", "default=noprint_wrappers=1:nokey=1", audio_path
192
+ ]
193
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
194
+ return float(result.stdout.strip())
195
+ except Exception as e:
196
+ raise Exception(f"Failed to get audio duration: {str(e)}")
197
+
198
+ def extract_first_subtitle(srt_path):
199
+ """Extract first subtitle entry. Returns: (text, start_sec, end_sec)"""
200
+ try:
201
+ with open(srt_path, 'r', encoding='utf-8') as f:
202
+ content = f.read()
203
+
204
+ blocks = re.split(r'\n\s*\n', content.strip())
205
+ if not blocks:
206
+ return "No subtitle found", 0.0, 3.0
207
+
208
+ first_block = blocks[0].strip().split('\n')
209
+ if len(first_block) >= 3:
210
+ times = first_block[1].split(' --> ')
211
+
212
+ def time_to_sec(t):
213
+ h, m, s = t.split(':')
214
+ s, ms = s.split(',')
215
+ return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
216
+
217
+ start_sec = time_to_sec(times[0].strip())
218
+ end_sec = time_to_sec(times[1].strip())
219
+ text = ' '.join(first_block[2:]).strip()
220
+
221
+ return text, start_sec, end_sec
222
+
223
+ return "No subtitle found", 0.0, 3.0
224
+ except Exception as e:
225
+ raise Exception(f"Failed to extract first subtitle: {str(e)}")
226
+
227
+ def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
228
+ """
229
+ Create Reddit card with text using PIL.
230
+ Uses REDDIT_CONFIG for all styling settings.
231
+ """
232
+ try:
233
+ template = Image.open(template_path).convert('RGBA')
234
+ template_width, template_height = template.size
235
+
236
+ text_box_width = int(template_width * config['text_box_width_percent'])
237
+ text_box_height = int(template_height * config['text_box_height_percent'])
238
+
239
+ best_font_size = config['font_size_max']
240
+ best_wrapped_text = hook_text
241
+
242
+ # Get font path
243
+ script_dir = os.path.dirname(os.path.abspath(__file__))
244
+ font_paths = [
245
+ os.path.join(script_dir, 'fonts', config['font_file']),
246
+ os.path.join(script_dir, config['font_file'])
247
+ ]
248
+
249
+ # Try font sizes from max to min
250
+ for font_size in range(config['font_size_max'], config['font_size_min'] - 1, -2):
251
+ # Try loading font from multiple locations
252
+ font = None
253
+ for font_path in font_paths:
254
+ if os.path.exists(font_path):
255
+ try:
256
+ font = ImageFont.truetype(font_path, font_size)
257
+ break
258
+ except:
259
+ pass
260
+
261
+ # Fallback fonts
262
+ if font is None:
263
+ try:
264
+ font = ImageFont.truetype('Verdana', font_size)
265
+ except:
266
+ font = ImageFont.load_default()
267
+
268
+ # Wrap and measure text
269
+ wrapped = textwrap.fill(hook_text, width=config['text_wrap_width'])
270
+ draw = ImageDraw.Draw(template)
271
+ bbox = draw.multiline_textbbox((0, 0), wrapped, font=font, spacing=config['line_spacing'])
272
+ text_width = bbox[2] - bbox[0]
273
+ text_height = bbox[3] - bbox[1]
274
+
275
+ if text_width <= text_box_width and text_height <= text_box_height:
276
+ best_font_size = font_size
277
+ best_wrapped_text = wrapped
278
+ break
279
+
280
+ # Draw text with best size
281
+ font = None
282
+ for font_path in font_paths:
283
+ if os.path.exists(font_path):
284
+ try:
285
+ font = ImageFont.truetype(font_path, best_font_size)
286
+ break
287
+ except:
288
+ pass
289
+
290
+ if font is None:
291
+ try:
292
+ font = ImageFont.truetype('Verdana', best_font_size)
293
+ except:
294
+ font = ImageFont.load_default()
295
+
296
+ draw = ImageDraw.Draw(template)
297
+ bbox = draw.multiline_textbbox((0, 0), best_wrapped_text, font=font, spacing=config['line_spacing'])
298
+ text_width = bbox[2] - bbox[0]
299
+ text_height = bbox[3] - bbox[1]
300
+
301
+ x = (template_width - text_width) / 2
302
+ y = (template_height - text_height) / 2 + config['y_offset']
303
+
304
+ draw.multiline_text(
305
+ (x, y),
306
+ best_wrapped_text,
307
+ fill=config['text_color'],
308
+ font=font,
309
+ spacing=config['line_spacing'],
310
+ align='left'
311
+ )
312
+
313
+ output_path = os.path.join(output_dir, 'reddit_card_composite.png')
314
+ template.save(output_path, 'PNG')
315
+
316
+ return output_path
317
+ except Exception as e:
318
+ raise Exception(f"Failed to create Reddit card: {str(e)}")
319
 
320
  def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
321
+ """Validate that only one input method is used and return the file path."""
322
  has_upload = uploaded_file is not None
323
  has_url = url_string and url_string.strip()
324
 
325
  if not has_upload and not has_url:
326
+ return None, f"❌ Please provide {file_type} either by upload or URL"
327
+
328
  if has_upload and has_url:
329
+ return None, f"❌ Please use only ONE method for {file_type}: either upload OR URL (not both)"
330
+
331
  if has_upload:
332
+ file_path = uploaded_file.name if hasattr(uploaded_file, 'name') else uploaded_file
333
+ return file_path, None
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
+ if has_url:
336
+ try:
337
+ url_parts = url_string.strip().split('/')
338
+ original_filename = url_parts[-1] if url_parts else f"{file_type}_file"
339
+
340
+ if '.' not in original_filename:
341
+ ext_map = {'video': '.mp4', 'audio': '.wav', 'subtitle': '.srt'}
342
+ original_filename += ext_map.get(file_type, '.tmp')
343
+
344
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
345
+ filename = f"{file_type}_{timestamp}_{original_filename}"
346
+
347
+ file_path = download_file_from_url(url_string.strip(), temp_dir, filename)
348
+ return file_path, None
349
+ except Exception as e:
350
+ return None, f"❌ Error downloading {file_type} from URL: {str(e)}"
 
 
351
 
352
+ return None, f"❌ Unknown error processing {file_type}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
 
354
  def srt_time_to_ms(time_str):
355
  """Convert SRT timestamp to milliseconds."""
356
+ time_str = time_str.strip()
357
+ hours, minutes, seconds = time_str.split(':')
358
+ seconds, milliseconds = seconds.split(',')
359
+ return (int(hours) * 3600000 + int(minutes) * 60000 +
360
+ int(seconds) * 1000 + int(milliseconds))
361
 
362
  def ms_to_ass_time(ms):
363
+ """Convert milliseconds to ASS timestamp format."""
364
+ hours = ms // 3600000
365
  ms %= 3600000
366
+ minutes = ms // 60000
367
  ms %= 60000
368
+ seconds = ms // 1000
369
+ centiseconds = (ms % 1000) // 10
370
+ return f"{hours}:{minutes:02d}:{seconds:02d}.{centiseconds:02d}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
 
372
  def create_word_by_word_highlight_ass(srt_path, output_dir, highlight_color='yellow',
373
+ font_size=None, skip_first=False, config=SUBTITLE_CONFIG):
374
+ """
375
+ Convert SRT to ASS with word-by-word highlighting.
376
+ Uses SUBTITLE_CONFIG for all font and styling settings.
377
+ """
378
+ if font_size is None:
379
+ font_size = config['font_size_default']
380
 
381
  color_map = {
382
+ 'yellow': ('&H0000FFFF', '&H00000000'),
383
+ 'orange': ('&H0000A5FF', '&H00000000'),
384
+ 'green': ('&H0000FF00', '&H00000000'),
385
+ 'cyan': ('&H00FFFF00', '&H00000000'),
386
+ 'pink': ('&H00FF69B4', '&H00000000'),
387
+ 'red': ('&H000000FF', '&H00FFFFFF'),
388
  'blue': ('&H00FF0000', '&H00FFFFFF'),
389
  }
390
+
391
  highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H0000FFFF', '&H00000000'))
392
 
393
  with open(srt_path, 'r', encoding='utf-8') as f:
394
  srt_content = f.read()
395
 
396
+ ass_path = os.path.join(output_dir, 'word_highlight_subtitles.ass')
397
+
398
  ass_header = f"""[Script Info]
399
+ Title: Word-by-Word Highlight Subtitles
400
  ScriptType: v4.00+
401
+ Collisions: Normal
402
+ PlayDepth: 0
403
+
404
  [V4+ Styles]
405
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
406
  Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,100,0,0,1,2,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
407
+
408
  [Events]
409
  Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
410
  """
411
 
 
412
  srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
413
+ ass_events = []
414
+ start_index = 1 if skip_first else 0
415
 
416
+ for block in srt_blocks[start_index:]:
417
  lines = block.strip().split('\n')
418
+ if len(lines) >= 3:
419
+ timestamp_line = lines[1]
420
+ times = timestamp_line.split(' --> ')
421
+ if len(times) == 2:
422
+ start_ms = srt_time_to_ms(times[0])
423
+ end_ms = srt_time_to_ms(times[1])
424
+
425
+ text = ' '.join(lines[2:])
426
+ words = text.split()
427
+
428
+ if not words:
429
+ continue
430
+
431
+ total_duration = end_ms - start_ms
432
+ time_per_word = total_duration / len(words)
433
+
434
+ for i, word in enumerate(words):
435
+ word_start_ms = start_ms + int(i * time_per_word)
436
+ word_end_ms = start_ms + int((i + 1) * time_per_word)
437
+
438
+ if i == len(words) - 1:
439
+ word_end_ms = end_ms
440
+
441
+ text_parts = []
442
+ for j, w in enumerate(words):
443
+ if j == i:
444
+ text_parts.append(f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{w}{{\\r}}")
445
+ else:
446
+ text_parts.append(w)
447
+
448
+ styled_text = ' '.join(text_parts)
449
+ start_time = ms_to_ass_time(word_start_ms)
450
+ end_time = ms_to_ass_time(word_end_ms)
451
+
452
+ ass_line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{styled_text}"
453
+ ass_events.append(ass_line)
454
+
455
+ with open(ass_path, 'w', encoding='utf-8') as f:
456
+ f.write(ass_header)
457
+ f.write('\n'.join(ass_events))
458
 
 
 
 
 
459
  return ass_path
460
 
461
+ def stitch_media(
462
+ video_file, video_url,
463
+ audio_file, audio_url,
464
+ subtitle_file, subtitle_url,
465
+ book_id,
466
+ enable_highlight,
467
+ highlight_color,
468
+ font_size,
469
+ crf_quality=23
470
+ ):
471
+ """Main video stitching function with Reddit overlay and book cover."""
472
  temp_dir = tempfile.mkdtemp()
473
 
474
  try:
475
+ # Setup custom fonts environment
476
  ffmpeg_env = setup_custom_fonts_hf(temp_dir)
477
 
478
+ # Validate files
479
+ video_path, video_error = validate_and_get_file(video_file, video_url, 'video', temp_dir)
480
+ if video_error: return None, video_error
 
 
 
481
 
482
+ audio_path, audio_error = validate_and_get_file(audio_file, audio_url, 'audio', temp_dir)
483
+ if audio_error: return None, audio_error
484
 
485
+ subtitle_path, subtitle_error = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
486
+ if subtitle_error: return None, subtitle_error
487
 
488
+ # Get video info
489
  video_width, video_height, video_fps = get_video_info(video_path)
490
  audio_duration = get_audio_duration(audio_path)
491
 
492
+ status_msg = "πŸ“₯ Processing files:\n"
493
+ status_msg += f" β€’ Video: {'URL' if video_url else 'Upload'} ({video_width}x{video_height} @ {video_fps:.2f}fps)\n"
494
+ status_msg += f" β€’ Audio: {'URL' if audio_url else 'Upload'} ({audio_duration:.2f}s)\n"
495
+ status_msg += f" β€’ Subtitle: {'URL' if subtitle_url else 'Upload'}\n"
496
 
497
+ # Check for Reddit template
498
  script_dir = os.path.dirname(os.path.abspath(__file__))
499
  reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
500
+ has_reddit_template = os.path.exists(reddit_template_path)
501
+
502
+ if has_reddit_template:
503
+ status_msg += " β€’ Reddit template: βœ… Found\n"
504
+ try:
505
+ first_sub_text, first_sub_start, first_sub_end = extract_first_subtitle(subtitle_path)
506
+ status_msg += f"\nπŸ“± Reddit Overlay:\n"
507
+ status_msg += f" β€’ Text: '{first_sub_text[:40]}...'\n"
508
+ status_msg += f" β€’ Timing: {first_sub_start:.1f}s - {first_sub_end:.1f}s\n"
509
+
510
+ reddit_card_path = create_reddit_card_with_text(
511
+ reddit_template_path, first_sub_text, temp_dir, REDDIT_CONFIG
512
+ )
513
+ status_msg += " β€’ βœ… Reddit card ready\n"
514
+ except Exception as e:
515
+ status_msg += f" β€’ ⚠️ Reddit card failed: {str(e)}\n"
516
+ has_reddit_template = False
517
+ else:
518
+ status_msg += " β€’ Reddit template: ⚠️ Not found (skipping)\n"
519
+
520
+ # Process subtitles
521
+ if enable_highlight:
522
+ status_msg += f"\n✨ Word highlighting: {highlight_color} ({font_size}px)\n"
523
+ subtitle_to_use = create_word_by_word_highlight_ass(
524
+ subtitle_path, temp_dir, highlight_color, font_size,
525
+ skip_first=has_reddit_template, config=SUBTITLE_CONFIG
526
+ )
527
+ else:
528
+ subtitle_to_use = subtitle_path
529
+
530
+ subtitle_escaped = subtitle_to_use.replace('\\', '/').replace(':', '\\:')
531
+
532
+ # Check book cover
533
+ has_book_cover = book_id and book_id.strip()
534
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
535
  output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
 
537
+ # Convert RGB to BGR hex
538
+ r, g, b = VIDEO_CONFIG['fade_color_rgb']
539
+ fade_color_hex = f"#dacfc3"
540
+
541
+ if has_book_cover:
542
+ status_msg += f"\nπŸ“š Downloading book cover (ID: {book_id})...\n"
543
+ try:
544
+ book_cover_path = download_book_cover(book_id.strip(), temp_dir)
545
+ status_msg += "βœ… Book cover downloaded\n"
546
+
547
+ # Calculate timing from config
548
+ fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
549
+ fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
550
+ fade_out_duration = fade_ends_at - fade_starts_at
551
+
552
+ promo_duration = audio_duration * VIDEO_CONFIG['promo_percent']
553
+ book_appears_at = audio_duration - promo_duration
554
+ solid_color_duration = book_appears_at - fade_ends_at
555
+
556
+ main_video_duration = fade_ends_at
557
+ cover_segment_duration = promo_duration
558
+
559
+ status_msg += f"\n⏱️ Timing: Fade {fade_starts_at:.1f}β†’{fade_ends_at:.1f}s, Hold {solid_color_duration:.1f}s\n"
560
+
561
+ # STEP 1: Main video with fade-out
562
+ status_msg += "🎬 Step 1/4: Main video with fade-out...\n"
563
+ main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
564
+ cmd_main = [
565
+ "ffmpeg", "-stream_loop", "-1", "-i", video_path, "-t", str(main_video_duration),
566
+ "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}",
567
+ "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", main_segment_path
568
+ ]
569
+ subprocess.run(cmd_main, check=True, capture_output=True, text=True, env=ffmpeg_env)
570
+
571
+ # STEP 2: Solid color
572
+ status_msg += "βœ… Step 1 done\n🎬 Step 2/4: Solid color...\n"
573
+ solid_color_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
574
+ cmd_solid = [
575
  "ffmpeg", "-f", "lavfi",
576
+ "-i", f"color=c={fade_color_hex}:s={video_width}x{video_height}:d={solid_color_duration}:r={video_fps}",
577
+ "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-y", solid_color_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
  ]
579
+ subprocess.run(cmd_solid, check=True, capture_output=True, text=True, env=ffmpeg_env)
580
+
581
+ # STEP 3: Cover with fade-in
582
+ status_msg += "βœ… Step 2 done\n🎬 Step 3/4: Cover with fade-in...\n"
583
+ cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
584
+ cmd_cover = [
585
+ "ffmpeg", "-loop", "1", "-i", book_cover_path, "-t", str(cover_segment_duration),
586
+ # CHANGE: Removed 'force_original_aspect_ratio' and 'pad'. Just scale to fit.
587
+ "-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps},fade=t=in:st=0:d={VIDEO_CONFIG['book_fade_in_duration']}:c={fade_color_hex}",
588
+ "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path
589
  ]
590
+ subprocess.run(cmd_cover, check=True, capture_output=True, text=True, env=ffmpeg_env)
591
+
592
+ # STEP 4: Concat + audio + subtitles + Reddit
593
+ status_msg += "βœ… Step 3 done\n🎬 Step 4/4: Final assembly...\n"
594
+ concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
595
+ with open(concat_list_path, 'w') as f:
596
+ f.write(f"file '{main_segment_path}'\n")
597
+ f.write(f"file '{solid_color_path}'\n")
598
+ f.write(f"file '{cover_segment_path}'\n")
599
+
600
+ if has_reddit_template:
601
+ filter_complex = (
602
+ f"[0:v]ass={subtitle_escaped}[bg];"
603
+ f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
604
+ f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v]"
605
+ )
606
+ cmd_final = [
607
+ "ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path,
608
+ "-loop", "1", "-i", reddit_card_path, "-i", audio_path,
609
+ "-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
610
+ "-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
611
+ "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
612
+ ]
613
+ else:
614
+ cmd_final = [
615
+ "ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path, "-i", audio_path,
616
+ "-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
617
+ "-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
618
+ "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
619
+ ]
620
+
621
+ subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
622
+
623
+ except subprocess.CalledProcessError as e:
624
+ return None, f"❌ FFmpeg error:\n{e.stderr[-1000:] if e.stderr else str(e)}"
625
+ except Exception as e:
626
+ return None, f"❌ Error: {str(e)}"
627
 
628
  else:
629
+ # No book cover - simple loop
630
+ status_msg += "\n🎬 Creating video...\n"
631
 
632
+ if has_reddit_template:
633
  filter_complex = (
634
  f"[0:v]ass={subtitle_escaped}[bg];"
635
  f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
636
+ f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v]"
637
  )
638
  cmd = [
639
+ "ffmpeg", "-stream_loop", "-1", "-i", video_path,
 
640
  "-loop", "1", "-i", reddit_card_path, "-i", audio_path,
641
  "-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
642
+ "-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
643
+ "-shortest", "-y", output_path
644
  ]
645
  else:
646
  cmd = [
647
+ "ffmpeg", "-stream_loop", "-1", "-i", video_path, "-i", audio_path,
 
648
  "-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
649
+ "-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
650
+ "-shortest", "-y", output_path
651
  ]
652
 
653
+ subprocess.run(cmd, check=True, capture_output=True, text=True, env=ffmpeg_env)
 
 
 
 
 
654
 
655
+ # Check output
656
  if os.path.exists(output_path):
657
+ file_size = os.path.getsize(output_path) / (1024 * 1024)
658
+ success_msg = f"βœ… Video created successfully!\n\n"
659
+ success_msg += f"πŸ“Š Size: {file_size:.2f} MB | Duration: {audio_duration:.2f}s\n"
660
+ success_msg += f"🎨 Quality: CRF {crf_quality} | FPS: {video_fps:.2f}\n"
661
+ if has_reddit_template:
662
+ success_msg += f"πŸ“± Reddit: βœ… ({first_sub_start:.1f}-{first_sub_end:.1f}s)\n"
663
+ if has_book_cover:
664
+ success_msg += f"πŸ“š Book: βœ… (Fade: 60β†’75%, Hold: 75β†’90%, Book: 90β†’100%)\n"
665
+ success_msg += "\n" + status_msg
666
  return output_path, success_msg
667
+ else:
668
+ return None, "❌ Output file was not created"
669
 
670
  except Exception as e:
671
+ return None, f"❌ Error: {str(e)}"
 
672
 
673
  # Gradio UI
674
  with gr.Blocks(title="Video Stitcher", theme=gr.themes.Soft()) as app:
675
+ gr.Markdown(
676
+ f"""
677
+ # 🎬 Video Stitcher with Reddit Overlay & Book Promo ✨
678
+
679
+ **Current Configuration:**
680
+ - πŸ“± Reddit text: {REDDIT_CONFIG['font_file']} ({REDDIT_CONFIG['font_size_max']}-{REDDIT_CONFIG['font_size_min']}px)
681
+ - πŸ’¬ Subtitle: {SUBTITLE_CONFIG['font_name']} ({SUBTITLE_CONFIG['font_size_default']}px)
682
+ - 🎨 Fade color: RGB{VIDEO_CONFIG['fade_color_rgb']}
683
+
684
+ **To customize:** Edit CONFIG dictionaries at top of script
685
+ """
686
+ )
687
 
688
  with gr.Row():
689
  with gr.Column():
690
+ gr.Markdown("### πŸ“Ή Video")
691
  with gr.Group():
 
692
  video_input = gr.File(label="Upload", file_types=[".mp4", ".mov", ".avi", ".mkv"], type="filepath")
693
+ gr.Markdown("**OR**")
694
+ video_url_input = gr.Textbox(label="URL", placeholder="https://example.com/video.mp4")
695
 
696
+ gr.Markdown("### 🎡 Audio")
697
  with gr.Group():
698
+ audio_input = gr.File(label="Upload", file_types=[".wav", ".mp3", ".aac", ".m4a"], type="filepath")
699
+ gr.Markdown("**OR**")
700
+ audio_url_input = gr.Textbox(label="URL", placeholder="https://example.com/audio.wav")
701
 
702
+ gr.Markdown("### πŸ“ Subtitle")
703
  with gr.Group():
 
704
  subtitle_input = gr.File(label="Upload (.srt)", file_types=[".srt"], type="filepath")
705
+ gr.Markdown("**OR**")
706
+ subtitle_url_input = gr.Textbox(label="URL", placeholder="https://example.com/subtitles.srt")
707
 
708
+ gr.Markdown("### πŸ“š Book Cover (Optional)")
709
+ book_id_input = gr.Textbox(label="Google Books ID", placeholder="wyaEDwAAQBAJ")
710
 
711
+ gr.Markdown("### ✨ Settings")
712
+ enable_highlight = gr.Checkbox(label="Word Highlighting", value=True)
713
+ highlight_color = gr.Dropdown(choices=['yellow', 'orange', 'green', 'cyan', 'pink', 'red', 'blue'], value='yellow', label="Color")
714
+ font_size = gr.Slider(12, 32, 18, step=2, label="Font Size")
715
+ crf_input = gr.Slider(18, 28, 23, step=1, label="Quality (CRF)")
 
 
716
 
717
  stitch_btn = gr.Button("🎬 Stitch Video", variant="primary", size="lg")
718
 
719
  with gr.Column():
720
+ gr.Markdown("### πŸ“Š Output")
721
+ status_output = gr.Textbox(label="Status", lines=14)
722
  video_output = gr.Video(label="Result")
723
 
724
+ gr.Markdown(
725
+ """
726
+ ### πŸ“ File Structure:
727
+ ```
728
+ project/
729
+ β”œβ”€β”€ app.py
730
+ β”œβ”€β”€ fonts/ (optional - for HF deployment)
731
+ β”‚ └── komiko_axis.ttf
732
+ β”œβ”€β”€ reddit_template.png (optional)
733
+ └── komiko_axis.ttf (or in fonts/)
734
+ ```
735
+ """
736
+ )
737
 
738
  stitch_btn.click(
739
  fn=stitch_media,
 
744
  )
745
 
746
  if __name__ == "__main__":
747
+ app.launch(show_error=True)