sampleacc-3003 commited on
Commit
80da1f0
Β·
verified Β·
1 Parent(s): e22275a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +634 -432
app.py CHANGED
@@ -20,46 +20,123 @@ from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
20
  from fastapi.responses import FileResponse, JSONResponse
21
  from fastapi.middleware.cors import CORSMiddleware
22
  from pydantic import BaseModel, Field
 
23
  # ========================================
24
  # CONFIGURATION SECTION - CUSTOMIZE HERE
25
  # ========================================
26
 
27
  REDDIT_CONFIG = {
28
- 'template_file': 'reddit_template.png',
29
- 'font_file': 'RFDewi-Bold.ttf',
30
- 'font_size_max': 180,
31
- 'font_size_min': 16,
32
- 'text_wrap_width': 35,
33
- 'text_color': 'black',
34
- 'line_spacing': 10,
35
- 'text_box_width_percent': 0.85,
36
- 'text_box_height_percent': 0.65,
37
- 'y_offset': 20,
 
38
  }
39
 
40
  SUBTITLE_CONFIG = {
41
- 'font_file': 'LilitaOne-Regular.ttf',
42
- 'font_name': 'Lilita One',
43
- 'font_size_default': 11,
44
- 'position_alignment': 5,
45
- 'margin_left': 70,
46
- 'margin_right': 80,
47
- 'margin_vertical': 20,
48
- 'line_spacing': 2
 
49
  }
50
 
51
  VIDEO_CONFIG = {
52
- 'reddit_scale_percent': 0.75,
53
- 'fade_start_percent': 0.70,
54
- 'fade_end_percent': 0.85,
55
- 'promo_percent': 0.094,
56
- 'fade_color_rgb': (218, 207, 195),
57
  }
58
 
59
  # ========================================
60
  # END CONFIGURATION SECTION
61
  # ========================================
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  # =========================
64
  # HELPER FUNCTIONS
65
  # =========================
@@ -74,154 +151,199 @@ def sec_to_ass_time(seconds):
74
  return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
75
 
76
  def setup_custom_fonts_hf(temp_dir):
77
- try:
78
- fonts_dir = os.path.join(temp_dir, 'fonts')
79
- os.makedirs(fonts_dir, exist_ok=True)
80
- script_dir = os.path.dirname(os.path.abspath(__file__))
81
- repo_fonts_dir = os.path.join(script_dir, 'fonts')
82
- fonts_to_copy = []
83
- if os.path.exists(repo_fonts_dir):
84
- for font_file in os.listdir(repo_fonts_dir):
85
- if font_file.endswith(('.ttf', '.otf', '.TTF', '.OTF')):
86
- fonts_to_copy.append(os.path.join(repo_fonts_dir, font_file))
87
- for item in [REDDIT_CONFIG['font_file'], SUBTITLE_CONFIG['font_file']]:
88
- font_path = os.path.join(script_dir, item)
89
- if os.path.exists(font_path) and font_path not in fonts_to_copy:
90
- fonts_to_copy.append(font_path)
91
- for src in fonts_to_copy:
92
- dst = os.path.join(fonts_dir, os.path.basename(src))
93
- shutil.copy(src, dst)
94
- if fonts_to_copy:
95
- fonts_conf = f"""<?xml version="1.0"?>
96
- <fontconfig><dir>{fonts_dir}</dir><cachedir>{temp_dir}/cache</cachedir></fontconfig>"""
97
- conf_path = os.path.join(temp_dir, 'fonts.conf')
98
- with open(conf_path, 'w') as f:
99
- f.write(fonts_conf)
100
- env = os.environ.copy()
101
- env['FONTCONFIG_FILE'] = conf_path
102
- env['FONTCONFIG_PATH'] = temp_dir
103
- return env
104
- return os.environ.copy()
105
- except Exception as e: return os.environ.copy()
 
 
 
 
 
 
 
106
 
107
  def download_file_from_url(url, output_dir, filename):
108
- try:
109
- response = requests.get(url, stream=True, timeout=30)
110
- response.raise_for_status()
111
- file_path = os.path.join(output_dir, filename)
112
- with open(file_path, 'wb') as f:
113
- for chunk in response.iter_content(chunk_size=8192): f.write(chunk)
114
- return file_path
115
- except Exception as e: raise Exception(f"Failed to download file: {str(e)}")
 
 
116
 
117
  def download_book_cover(book_id, output_dir):
118
- try:
119
- image_url = f"https://books.google.com/books/publisher/content/images/frontcover/{book_id}"
120
- response = requests.get(image_url, timeout=30)
121
- response.raise_for_status()
122
- image_path = os.path.join(output_dir, 'book_cover.png')
123
- with open(image_path, 'wb') as f: f.write(response.content)
124
- Image.open(image_path).verify()
125
- return image_path
126
- except Exception as e: raise Exception(f"Failed to download book cover: {str(e)}")
 
 
127
 
128
  def decode_base64_image(base64_string, output_dir):
129
- try:
130
- if ',' in base64_string and 'base64' in base64_string:
131
- base64_string = base64_string.split(',', 1)[1]
132
- image_data = base64.b64decode(base64_string.strip())
133
- Image.open(BytesIO(image_data)).verify()
134
- output_path = os.path.join(output_dir, f"book_cover_b64_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png")
135
- Image.open(BytesIO(image_data)).save(output_path, 'PNG')
136
- return output_path
137
- except Exception as e: raise Exception(f"Base64 decode failed: {str(e)}")
 
138
 
139
  def validate_book_cover_input(book_cover_file, book_cover_url, book_cover_base64, book_id, temp_dir):
140
- has_file = book_cover_file is not None
141
- has_url = bool(book_cover_url and book_cover_url.strip())
142
- has_base64 = bool(book_cover_base64 and book_cover_base64.strip())
143
- has_id = bool(book_id and book_id.strip())
144
- methods_count = sum([has_file, has_url, has_base64, has_id])
145
- if methods_count == 0: return None, None
146
- if methods_count > 1: return None, "❌ Book Cover: Use only ONE method"
147
- try:
148
- if has_file: return str(book_cover_file.name if hasattr(book_cover_file, 'name') else book_cover_file), None
149
- if has_url: return download_file_from_url(book_cover_url.strip(), temp_dir, f"book_cover_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"), None
150
- if has_base64: return decode_base64_image(book_cover_base64.strip(), temp_dir), None
151
- if has_id: return download_book_cover(book_id.strip(), temp_dir), None
152
- except Exception as e: return None, f"❌ Book cover error: {str(e)}"
153
- return None, None
 
 
 
 
 
 
 
154
 
155
  def get_video_info(video_path):
156
- try:
157
- cmd_res = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=width,height", "-of", "csv=s=x:p=0", video_path]
158
- result = subprocess.run(cmd_res, capture_output=True, text=True, check=True)
159
- width, height = result.stdout.strip().split('x')
160
- cmd_fps = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=r_frame_rate", "-of", "default=noprint_wrappers=1:nokey=1", video_path]
161
- result = subprocess.run(cmd_fps, capture_output=True, text=True, check=True)
162
- fps_str = result.stdout.strip()
163
- fps = float(fps_str.split('/')[0]) / float(fps_str.split('/')[1]) if '/' in fps_str else float(fps_str)
164
- return int(width), int(height), fps
165
- except Exception as e: raise Exception(f"Failed to get video info: {str(e)}")
 
166
 
167
  def get_audio_duration(audio_path):
168
- try:
169
- cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", audio_path]
170
- result = subprocess.run(cmd, capture_output=True, text=True, check=True)
171
- return float(result.stdout.strip())
172
- except Exception as e: raise Exception(f"Failed to get audio duration: {str(e)}")
 
173
 
174
  def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
175
- try:
176
- template = Image.open(template_path).convert('RGBA')
177
- temp_w, temp_h = template.size
178
- box_w = int(temp_w * config['text_box_width_percent'])
179
- box_h = int(temp_h * config['text_box_height_percent'])
180
- script_dir = os.path.dirname(os.path.abspath(__file__))
181
- font_paths = [os.path.join(script_dir, 'fonts', config['font_file']), os.path.join(script_dir, config['font_file'])]
182
- best_font_size = config['font_size_max']
183
- best_wrapped_text = hook_text
184
- for font_size in range(config['font_size_max'], config['font_size_min'] - 1, -2):
185
- font = None
186
- for fp in font_paths:
187
- if os.path.exists(fp):
188
- try: font = ImageFont.truetype(fp, font_size); break
189
- except: pass
190
- if font is None: font = ImageFont.load_default()
191
- wrapped = textwrap.fill(hook_text, width=config['text_wrap_width'])
192
- draw = ImageDraw.Draw(template)
193
- bbox = draw.multiline_textbbox((0, 0), wrapped, font=font, spacing=config['line_spacing'])
194
- if (bbox[2]-bbox[0] <= box_w and bbox[3]-bbox[1] <= box_h):
195
- best_font_size = font_size; best_wrapped_text = wrapped; break
196
- font = None
197
- for fp in font_paths:
198
- if os.path.exists(fp):
199
- try: font = ImageFont.truetype(fp, best_font_size); break
200
- except: pass
201
- if font is None: font = ImageFont.load_default()
202
- draw = ImageDraw.Draw(template)
203
- bbox = draw.multiline_textbbox((0, 0), best_wrapped_text, font=font, spacing=config['line_spacing'])
204
- x = (temp_w - (bbox[2]-bbox[0])) / 2
205
- y = (temp_h - (bbox[3]-bbox[1])) / 2 + config['y_offset']
206
- draw.multiline_text((x, y), best_wrapped_text, fill=config['text_color'], font=font, spacing=config['line_spacing'], align='left')
207
- output_path = os.path.join(output_dir, 'reddit_card_composite.png')
208
- template.save(output_path, 'PNG')
209
- return output_path
210
- except Exception as e: raise Exception(f"Failed to create Reddit card: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
- def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
213
- has_upload = uploaded_file is not None
214
- has_url = url_string and url_string.strip()
215
- if not has_upload and not has_url: return None, f"❌ Please provide {file_type}"
216
- if has_upload and has_url: return None, f"❌ Use only ONE method for {file_type}"
217
- if has_upload: return str(uploaded_file.name if hasattr(uploaded_file, 'name') else uploaded_file), None
218
- if has_url:
219
- try:
220
- fname = f"{file_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{url_string.split('/')[-1] if url_string.split('/')[-1] else 'file'}"
221
- return download_file_from_url(url_string.strip(), temp_dir, fname), None
222
- except Exception as e: return None, f"❌ Error downloading {file_type}: {str(e)}"
223
- return None, f"❌ Unknown error"
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
  # ============================================
227
  # JSON LOGIC: PARSERS & SUBTITLE GENERATORS
@@ -232,50 +354,47 @@ def extract_first_subtitle(json_path):
232
  try:
233
  with open(json_path, 'r', encoding='utf-8') as f:
234
  data = json.load(f)
235
-
236
  title_words = []
237
  start_time = None
238
  end_time = 3.0
239
-
240
  for segment in data.get('segments', []):
241
  for word_data in segment.get('words', []):
242
  word_text = word_data.get('text', '').strip()
243
- if not word_text: continue
244
-
 
245
  if start_time is None:
246
  start_time = word_data.get('start_time', 0.0)
247
-
248
  title_words.append(word_text)
249
-
250
- # Check if this word ends with sentence-ending punctuation
251
  if re.search(r'[.!?]$', word_text):
252
  end_time = word_data.get('end_time', 3.0)
253
  return " ".join(title_words), start_time, end_time
254
-
255
- # Fallback just in case there is literally no punctuation
256
  if title_words:
257
  return " ".join(title_words), start_time, end_time
258
  return "No subtitle found", 0.0, 3.0
259
- except Exception as e:
260
  print(f"Error extracting first subtitle: {e}")
261
  return "No subtitle found", 0.0, 3.0
262
 
263
- # ============================================
264
- # FINDS BOOK TITLE WORD'S EXACT TIMINGS
265
- # ============================================
266
  def find_title_and_cta(json_path, book_title):
267
  """Uses a sliding window to find the exact start and end millisecond of the book title."""
268
  try:
269
- if not book_title or not book_title.strip(): return None, None
270
-
271
- with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f)
272
-
 
 
273
  book_title_lower = book_title.lower()
274
  title_clean = re.sub(r'[^\w\s]', '', book_title_lower).strip()
275
  book_title_words = title_clean.split()
276
  window_size = len(book_title_words)
277
-
278
- # Flatten all words with their timings
279
  all_words = []
280
  for segment in data.get('segments', []):
281
  for word_data in segment.get('words', []):
@@ -286,60 +405,68 @@ def find_title_and_cta(json_path, book_title):
286
  'start': word_data.get('start_time', 0.0),
287
  'end': word_data.get('end_time', 0.0)
288
  })
289
-
290
  best_score = 0
291
  best_start = None
292
  best_end = None
293
-
294
- # Sliding Window: Checks 2, 3, and 4 word groups to catch fuzzy/bad transcriptions
295
  for w_size in [window_size, window_size + 1, window_size - 1]:
296
- if w_size <= 0: continue
 
297
  for i in range(len(all_words) - w_size + 1):
298
- window_text = " ".join([w['text'] for w in all_words[i : i + w_size]]).lower()
299
  window_text_clean = re.sub(r'[^\w\s]', '', window_text).strip()
300
-
301
  score = fuzz.ratio(title_clean, window_text_clean)
302
  if score > best_score:
303
  best_score = score
304
  best_start = all_words[i]['start']
305
  best_end = all_words[i + w_size - 1]['end']
306
-
307
- # If it's a strong match, return exact start and end times
308
  if best_score >= 85:
309
  return best_start, best_end
310
-
311
  return None, None
312
  except Exception as e:
313
  print(f"Error finding title: {e}")
314
  return None, None
315
 
316
- def create_body_ass_from_json(json_path, output_dir, highlight_color='yellow',
317
- font_size=None, start_time_sec=0.0, config=SUBTITLE_CONFIG,
318
- stop_time_sec=None):
319
  """Creates dynamic body subtitles starting at 1 word and increasing by 2 up to 50."""
320
- if font_size is None: font_size = config['font_size_default']
321
- color_map = {'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'), 'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'), 'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'), 'blue': ('&H00FF0000', '&H00FFFFFF')}
 
 
 
 
 
 
322
  highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
323
 
324
  ass_path = os.path.join(output_dir, 'body_subtitles.ass')
325
  ass_header = f"""[Script Info]
326
- Title: Body JSON Subtitles
327
- ScriptType: v4.00+
328
- [V4+ Styles]
329
- Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
330
- Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,1,3,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
331
- [Events]
332
- Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
333
 
334
- with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f)
 
335
 
336
  all_words = []
337
  for segment in data.get('segments', []):
338
  for word_data in segment.get('words', []):
339
  word_text = word_data.get('text', '').strip()
340
- start_ms = word_data.get('start_time', 0)
341
- if start_ms < start_time_sec - 0.1: continue
342
- if stop_time_sec is not None and start_ms >= stop_time_sec - 0.1: continue
 
 
343
  if word_text:
344
  all_words.append({'word': word_text, 'start': start_ms, 'end': word_data.get('end_time', 0)})
345
 
@@ -347,11 +474,11 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\
347
  i = 0
348
  current_chunk_size = 1
349
  max_chunk_size = 50
350
-
351
  while i < len(all_words):
352
  remaining = len(all_words) - i
353
  take = min(current_chunk_size, remaining)
354
- chunks.append(all_words[i : i + take])
355
  i += take
356
  if current_chunk_size < max_chunk_size:
357
  current_chunk_size = min(current_chunk_size + 4, max_chunk_size)
@@ -362,20 +489,23 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\
362
  frame_end = chunk[-1]['end']
363
  for idx, info in enumerate(chunk):
364
  w_start = info['start']
365
- w_end = chunk[idx+1]['start'] if idx + 1 < len(chunk) else frame_end
366
-
367
  text_parts = []
368
  for j, word_str in enumerate(chunk_text_only):
369
- if j == idx: text_parts.append(f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{word_str}{{\\r}}")
370
- else: text_parts.append(word_str)
371
- ass_events.append(f"Dialogue: 0,{sec_to_ass_time(w_start)},{sec_to_ass_time(w_end)},Default,,0,0,0,,{' '.join(text_parts)}")
372
-
373
- with open(ass_path, 'w', encoding='utf-8') as f:
 
 
 
374
  f.write(ass_header + '\n'.join(ass_events))
375
  return ass_path
376
 
377
-
378
- def create_cta_ass_from_json(json_path, output_dir, start_sec, font_size, video_width, video_height, highlight_color='yellow', config=SUBTITLE_CONFIG, words_per_frame=10):
379
  """Creates the chunky, Instagram-style box subtitles for the CTA."""
380
  color_map = {
381
  'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'),
@@ -384,47 +514,47 @@ def create_cta_ass_from_json(json_path, output_dir, start_sec, font_size, video_
384
  'blue': ('&H00FF0000', '&H00FFFFFF')
385
  }
386
  highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
387
- margin_lr = int(video_width * 0.125) + 40
388
 
389
  ass_path = os.path.join(output_dir, 'cta_subtitles.ass')
390
- # Style logic: WrapStyle=1, BorderStyle=3, Outline=10 (Tight Instagram Box)
391
  ass_header = f"""[Script Info]
392
- Title: CTA JSON Subtitles
393
- ScriptType: v4.00+
394
- PlayResX: {video_width}
395
- PlayResY: {video_height}
396
- WrapStyle: 1
397
- [V4+ Styles]
398
- Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
399
- Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,3,10,0,5,{margin_lr},{margin_lr},0,1
400
- [Events]
401
- Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
402
-
403
- with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f)
 
404
 
405
  all_cta_words = []
406
  for segment in data.get('segments', []):
407
  for word_data in segment.get('words', []):
408
  word_text = word_data.get('text', '').strip()
409
  start_ms = word_data.get('start_time', 0)
410
- if start_ms < start_sec - 0.1: continue # Skip words before the CTA starts
 
411
  if word_text:
412
- # Merge "Book" and "Access" into "BookXcess"
413
- if word_text.lower().startswith('access') and len(all_cta_words) > 0 and all_cta_words[-1]['word'].lower() == 'book':
414
- # Keep any trailing punctuation (like commas or periods) from "Access"
415
  punctuation = word_text[6:]
416
  all_cta_words[-1]['word'] = 'BookXcess' + punctuation
417
- # Extend the highlight time to cover both words
418
  all_cta_words[-1]['end'] = word_data.get('end_time', 0)
419
- continue # Skip adding "Access" as a separate word
420
- all_cta_words.append({'word': word_text, 'start': start_ms, 'end': word_data.get('end_time', 0)})
 
421
 
422
  chunks = []
423
  i = 0
424
  while i < len(all_cta_words):
425
  remaining = len(all_cta_words) - i
426
  take = remaining if words_per_frame < remaining <= words_per_frame + 2 else min(words_per_frame, remaining)
427
- chunks.append(all_cta_words[i : i + take])
428
  i += take
429
 
430
  ass_events = []
@@ -433,15 +563,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\
433
  frame_end = chunk[-1]['end']
434
  for idx, info in enumerate(chunk):
435
  w_start = info['start']
436
- w_end = chunk[idx+1]['start'] if idx + 1 < len(chunk) else frame_end
437
-
438
  text_parts = []
439
  for j, word_str in enumerate(chunk_text_only):
440
- if j == idx: text_parts.append(f"{{\\c{highlight_text}}}{word_str}{{\\r}}")
441
- else: text_parts.append(word_str)
442
- ass_events.append(f"Dialogue: 1,{sec_to_ass_time(w_start)},{sec_to_ass_time(w_end)},Default,,0,0,0,,{' '.join(text_parts)}")
443
-
444
- with open(ass_path, 'w', encoding='utf-8') as f:
 
 
 
445
  f.write(ass_header + '\n'.join(ass_events))
446
  return ass_path
447
 
@@ -449,182 +582,209 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\
449
  # MAIN STITCH FUNCTION
450
  # =========================
451
 
452
- def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, subtitle_url, book_cover_file, book_cover_url, book_cover_base64, book_id, book_title, enable_highlight, highlight_color, font_size, crf_quality=23):
453
- temp_dir = tempfile.mkdtemp()
454
- status_msg = "πŸš€ Starting video stitching...\n"
455
- try:
456
- ffmpeg_env = setup_custom_fonts_hf(temp_dir)
457
- video_path, v_err = validate_and_get_file(video_file, video_url, 'video', temp_dir)
458
- if v_err: return None, v_err
459
- audio_path, a_err = validate_and_get_file(audio_file, audio_url, 'audio', temp_dir)
460
- if a_err: return None, a_err
461
- subtitle_path, s_err = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
462
- if s_err: return None, s_err
463
-
464
- # ✨ PRE-PROCESS SPEED HACK ✨
465
- speed_factor = 1.3
466
-
467
- # 1. Physically speed up the audio file
468
- fast_audio = os.path.join(temp_dir, f"fast_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3")
469
- subprocess.run(["ffmpeg", "-v", "error", "-y", "-i", audio_path, "-filter:a", f"atempo={speed_factor}", fast_audio], check=True)
470
- audio_path = fast_audio # Trick the script into using the fast audio!
471
-
472
- # 2. Physically shrink the JSON timestamps
473
- fast_json = os.path.join(temp_dir, f"fast_subs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
474
- with open(subtitle_path, 'r', encoding='utf-8') as f: json_data = json.load(f)
475
-
476
- for segment in json_data.get('segments', []):
477
- segment['start_time'] = segment.get('start_time', 0) / speed_factor
478
- segment['end_time'] = segment.get('end_time', 0) / speed_factor
479
- for word in segment.get('words', []):
480
- word['start_time'] = word.get('start_time', 0) / speed_factor
481
- word['end_time'] = word.get('end_time', 0) / speed_factor
482
-
483
- with open(fast_json, 'w', encoding='utf-8') as f: json.dump(json_data, f)
484
- subtitle_path = fast_json # Trick the script into using the fast subtitles!
485
-
486
- video_width, video_height, video_fps = get_video_info(video_path)
487
- audio_duration = get_audio_duration(audio_path) # Now gets the new 1:18 duration natively!
488
-
489
- script_dir = os.path.dirname(os.path.abspath(__file__))
490
- reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
491
- has_reddit_template = os.path.exists(reddit_template_path)
492
-
493
- first_sub_start = 0
494
- first_sub_end = 0
495
- if has_reddit_template:
496
- try:
497
- first_sub_text, first_sub_start, first_sub_end = extract_first_subtitle(subtitle_path)
498
- status_msg += f"\nπŸ“± Reddit Overlay: '{first_sub_text[:30]}...'\n"
499
- reddit_card_path = create_reddit_card_with_text(reddit_template_path, first_sub_text, temp_dir, REDDIT_CONFIG)
500
- except Exception as e:
501
- status_msg += f" β€’ ⚠️ Reddit card failed: {str(e)}\n"
502
- has_reddit_template = False
503
-
504
- # --- 1. Find Title Exact Word Timings ---
505
- title_start, title_end = find_title_and_cta(subtitle_path, book_title)
506
-
507
- book_appears_at = title_start if title_start is not None else audio_duration * (1 - VIDEO_CONFIG['promo_percent'])
508
- box_appears_at = title_end if title_end is not None else book_appears_at + 1.5
509
-
510
- if title_start is not None:
511
- status_msg += f"\nπŸ“– Hard cut to Book Cover at {title_start:.2f}s\n"
512
- status_msg += f"🀫 Book title silenced in subtitles.\n"
513
- status_msg += f"πŸ–€ CTA text starts exactly at {title_end:.2f}s\n"
514
-
515
- # --- 2. Prepare Dynamic CTA Text (JSON) ---
516
- status_msg += "πŸ–€ Generating Instagram-style dynamic CTA...\n"
517
- cta_font_size = int(video_width * 0.060)
518
-
519
- cta_ass_path = create_cta_ass_from_json(
520
- subtitle_path, temp_dir, box_appears_at,
521
- cta_font_size, video_width, video_height, highlight_color
522
- )
523
- cta_sub_escaped = cta_ass_path.replace('\\', '/').replace(':', '\\:')
524
-
525
- # --- 3. Process Main Subtitles (JSON) ---
526
- if enable_highlight:
527
- status_msg += f"\n✨ Processing JSON subtitles...\n"
528
- body_start_time = first_sub_end if has_reddit_template else 0.0
529
- main_subtitle_path = create_body_ass_from_json(
530
- subtitle_path, temp_dir, highlight_color, font_size,
531
- start_time_sec=body_start_time, config=SUBTITLE_CONFIG,
532
- stop_time_sec=book_appears_at # Stops EXACTLY before the title is spoken
533
- )
534
- else:
535
- main_subtitle_path = subtitle_path
536
-
537
- main_sub_escaped = main_subtitle_path.replace('\\', '/').replace(':', '\\:')
538
-
539
- book_cover_path, book_error = validate_book_cover_input(book_cover_file, book_cover_url, book_cover_base64, book_id, temp_dir)
540
- if book_error: return None, book_error
541
- has_book_cover = book_cover_path is not None
542
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
543
- output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
544
-
545
- if has_book_cover:
546
- try:
547
- fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
548
- fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
549
-
550
- # Safety net: If the book title is spoken BEFORE the fade is supposed to end,
551
- # we shorten the fade so it doesn't overlap the book cover cut.
552
- if fade_ends_at > book_appears_at:
553
- fade_ends_at = book_appears_at
554
- fade_starts_at = min(fade_starts_at, fade_ends_at - 1.0)
555
-
556
- fade_out_duration = fade_ends_at - fade_starts_at
557
- solid_color_duration = max(0, book_appears_at - fade_ends_at)
558
-
559
- main_video_duration = fade_ends_at
560
- cover_segment_duration = audio_duration - book_appears_at
561
- fade_color_hex = "#dacfc3" # Book page type color
562
-
563
- # 1. Main Segment (background video fading into sandal color)
564
- main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
565
- cmd_main = ["ffmpeg", "-stream_loop", "-1", "-i", video_path, "-t", str(main_video_duration), "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", main_segment_path]
566
- subprocess.run(cmd_main, check=True, capture_output=True, text=True, env=ffmpeg_env)
567
-
568
- # 2. Solid Color Segment (Holds the sandal color until the hard cut)
569
- solid_color_path = None
570
- if solid_color_duration > 0:
571
- solid_color_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
572
- cmd_solid = ["ffmpeg", "-f", "lavfi", "-i", f"color=c={fade_color_hex}:s={video_width}x{video_height}:d={solid_color_duration}:r={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-y", solid_color_path]
573
- subprocess.run(cmd_solid, check=True, capture_output=True, text=True, env=ffmpeg_env)
574
-
575
- # 3. Book Cover Segment (Hard cut triggered exactly when title is spoken)
576
- cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
577
- cmd_cover = ["ffmpeg", "-loop", "1", "-i", book_cover_path, "-t", str(cover_segment_duration), "-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps}", "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path]
578
- subprocess.run(cmd_cover, check=True, capture_output=True, text=True, env=ffmpeg_env)
579
-
580
- # 4. Stitch them all together
581
- concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
582
- with open(concat_list_path, 'w') as f:
583
- f.write(f"file '{main_segment_path}'\n")
584
- if solid_color_path:
585
- f.write(f"file '{solid_color_path}'\n")
586
- f.write(f"file '{cover_segment_path}'\n")
587
-
588
- #--- 5. Build the Filter Graph (Subtitles, Overlays & SPEEDUP) ---
589
- input_cmd = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path]
590
- curr_idx = 1
591
- curr_stream = "[0:v]"
592
-
593
- if has_reddit_template:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
  input_cmd += ["-loop", "1", "-i", reddit_card_path]
595
  filter_complex = f"[{curr_idx}:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];{curr_stream}[reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v1];"
596
  curr_stream, curr_idx = "[v1]", curr_idx + 1
597
- else:
598
- filter_complex = f"{curr_stream}copy[v1];"; curr_stream = "[v1]"
599
-
600
- # 1. Burn in Main Subtitles
601
- filter_complex += f"{curr_stream}ass={main_sub_escaped}[v2];"; curr_stream = "[v2]"
602
-
603
- # 2. Burn in CTA Subtitles (Straight to v_final - NO DUPLICATES)
604
- if cta_ass_path:
605
- filter_complex += f"{curr_stream}ass={cta_sub_escaped}[v_final]"
606
- else:
607
- filter_complex += f"{curr_stream}copy[v_final]"
608
-
609
- input_cmd += ["-i", audio_path]
610
-
611
- cmd_final = input_cmd + [
612
- "-filter_complex", filter_complex,
613
- "-map", "[v_final]", "-map", f"{curr_idx}:a",
614
- "-c:v", "libx264", "-crf", str(crf_quality),
615
- "-c:a", "aac", "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
616
- ]
617
-
618
- status_msg += "🎬 Rendering final synchronized video...\n"
619
- subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
620
- except Exception as e:
621
- return None, f"❌ Book cover processing error: {str(e)}"
622
-
623
- if os.path.exists(output_path): return output_path, f"βœ… Success!"
624
- else: return None, "❌ Output not created"
625
- except Exception as e: return None, f"❌ Error: {str(e)}"
626
-
627
-
 
 
 
 
 
 
 
 
628
  app = FastAPI(title="Video Stitcher API")
629
 
630
  app.add_middleware(
@@ -647,31 +807,69 @@ def _save_upload_to_temp(upload_file: UploadFile, temp_dir: str) -> str:
647
  f.write(upload_file.file.read())
648
  return dest_path
649
 
650
- @app.post('/video_stitch', responses={400: {"model": StitchErrorResponse}, 500: {"model": StitchErrorResponse}})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
651
  async def stitch_upload(
652
- request: Request,
653
- video_file: Optional[UploadFile] = File(None),
654
- video_url: Optional[str] = Form(None),
655
- audio_file: Optional[UploadFile] = File(None),
656
- audio_url: Optional[str] = Form(None),
657
- subtitle_file: Optional[UploadFile] = File(None),
658
- subtitle_url: Optional[str] = Form(None),
659
- book_cover_file: Optional[UploadFile] = File(None),
660
- book_cover_url: Optional[str] = Form(None),
661
- book_cover_base64: Optional[str] = Form(None),
662
- book_id: Optional[str] = Form(None),
663
- book_title: Optional[str] = Form(None),
664
- enable_highlight: bool = Form(True),
665
- highlight_color: str = Form('yellow'),
666
- font_size: int = Form(10),
667
- crf_quality: int = Form(23),
668
- ):
669
  # Format validation
670
  if subtitle_file and not subtitle_file.filename.endswith('.json'):
671
  raise HTTPException(status_code=422, detail="❌ Subtitle must be a .json file")
672
  if subtitle_url and not subtitle_url.strip().split('?')[0].endswith('.json'):
673
  raise HTTPException(status_code=422, detail="❌ Subtitle URL must point to a .json file")
674
- if audio_file and audio_file.content_type not in {"audio/mpeg", "audio/mp3", "audio/wav", "audio/x-wav", "audio/aac", "audio/mp4", "audio/x-m4a"}:
 
675
  raise HTTPException(status_code=422, detail=f"❌ Invalid audio format: {audio_file.content_type}")
676
  if book_cover_file and book_cover_file.content_type not in {"image/jpeg", "image/png", "image/webp"}:
677
  raise HTTPException(status_code=422, detail="❌ Book cover must be jpeg, png, or webp")
@@ -743,4 +941,8 @@ async def stitch_upload(
743
 
744
  @app.get('/health')
745
  async def health():
746
- return {"status": "ok"}
 
 
 
 
 
20
  from fastapi.responses import FileResponse, JSONResponse
21
  from fastapi.middleware.cors import CORSMiddleware
22
  from pydantic import BaseModel, Field
23
+
24
  # ========================================
25
  # CONFIGURATION SECTION - CUSTOMIZE HERE
26
  # ========================================
27
 
28
  REDDIT_CONFIG = {
29
+ 'template_file': 'reddit_template.png',
30
+ 'font_file': 'RFDewi-Bold.ttf',
31
+ 'font_size_max': 180,
32
+ 'font_size_min': 16,
33
+ 'text_wrap_width': 35,
34
+ 'text_color': 'black',
35
+ 'line_spacing': 10,
36
+ 'text_box_width_percent': 0.85,
37
+ 'text_box_height_percent': 0.65,
38
+ 'y_offset': 20,
39
+ '_resolved_font_path': None,
40
  }
41
 
42
  SUBTITLE_CONFIG = {
43
+ 'font_file': 'LilitaOne-Regular.ttf',
44
+ 'font_name': 'Lilita One',
45
+ 'font_size_default': 11,
46
+ 'position_alignment': 5,
47
+ 'margin_left': 70,
48
+ 'margin_right': 80,
49
+ 'margin_vertical': 20,
50
+ 'line_spacing': 2,
51
+ '_resolved_font_path': None,
52
  }
53
 
54
  VIDEO_CONFIG = {
55
+ 'reddit_scale_percent': 0.75,
56
+ 'fade_start_percent': 0.70,
57
+ 'fade_end_percent': 0.85,
58
+ 'promo_percent': 0.094,
59
+ 'fade_color_rgb': (218, 207, 195),
60
  }
61
 
62
  # ========================================
63
  # END CONFIGURATION SECTION
64
  # ========================================
65
 
66
+ # =========================
67
+ # FONT RESOLUTION HELPERS
68
+ # =========================
69
+
70
+ def get_all_font_paths(font_filename):
71
+ """Builds every possible location the font might live."""
72
+ paths = []
73
+ script_dir = os.path.dirname(os.path.abspath(__file__))
74
+ cwd = os.getcwd()
75
+
76
+ paths.append(os.path.join(script_dir, 'fonts', font_filename))
77
+ paths.append(os.path.join(script_dir, font_filename))
78
+ paths.append(os.path.join(cwd, 'fonts', font_filename))
79
+ paths.append(os.path.join(cwd, font_filename))
80
+ paths.append(f"/home/user/app/{font_filename}")
81
+ paths.append(f"/home/user/app/fonts/{font_filename}")
82
+ paths.append(f"/app/{font_filename}")
83
+ paths.append(f"/app/fonts/{font_filename}")
84
+
85
+ # Deduplicate while preserving order
86
+ seen = set()
87
+ unique = []
88
+ for p in paths:
89
+ normalized = os.path.normpath(p)
90
+ if normalized not in seen:
91
+ seen.add(normalized)
92
+ unique.append(normalized)
93
+ return unique
94
+
95
+ def load_font_safe(font_paths, size):
96
+ """Attempts to load a font from a list of paths with full logging."""
97
+ for fp in font_paths:
98
+ exists = os.path.exists(fp)
99
+ print(f"πŸ” Checking font path: {fp} β†’ exists: {exists}")
100
+ if exists:
101
+ file_size = os.path.getsize(fp)
102
+ print(f" πŸ“¦ File size: {file_size} bytes")
103
+ if file_size < 1000:
104
+ print(f" ⚠️ Suspiciously small β€” possibly corrupted or LFS pointer")
105
+ try:
106
+ font = ImageFont.truetype(fp, size)
107
+ print(f"βœ… Loaded font: {fp} at size {size}")
108
+ return font, fp
109
+ except Exception as e:
110
+ print(f" ⚠️ Failed to load: {e}")
111
+
112
+ print(f"❌ NO FONT FOUND at size {size} β€” falling back to default")
113
+ return ImageFont.load_default(), None
114
+
115
+ def resolve_font_at_startup(font_filename, config_dict):
116
+ """Resolves a font path once and stores it in the config."""
117
+ print(f"\n{'='*50}")
118
+ print(f"πŸ”Ž Resolving font: {font_filename}")
119
+ print(f"{'='*50}")
120
+ paths = get_all_font_paths(font_filename)
121
+ font, resolved_path = load_font_safe(paths, 40)
122
+
123
+ if resolved_path:
124
+ config_dict['_resolved_font_path'] = resolved_path
125
+ print(f"βœ… Cached resolved path: {resolved_path}\n")
126
+ else:
127
+ config_dict['_resolved_font_path'] = None
128
+ print(f"🚨 FONT NOT RESOLVED β€” will use Pillow default (tiny bitmap)\n")
129
+
130
+ def load_font_cached(config_dict, size):
131
+ """Loads font from the pre-resolved path. Fast, no searching."""
132
+ resolved = config_dict.get('_resolved_font_path')
133
+ if resolved and os.path.exists(resolved):
134
+ try:
135
+ return ImageFont.truetype(resolved, size)
136
+ except Exception as e:
137
+ print(f"⚠️ Cached font failed at size {size}: {e}")
138
+ return ImageFont.load_default()
139
+
140
  # =========================
141
  # HELPER FUNCTIONS
142
  # =========================
 
151
  return f"{h}:{m:02d}:{s:02d}.{cs:02d}"
152
 
153
  def setup_custom_fonts_hf(temp_dir):
154
+ try:
155
+ fonts_dir = os.path.join(temp_dir, 'fonts')
156
+ os.makedirs(fonts_dir, exist_ok=True)
157
+ script_dir = os.path.dirname(os.path.abspath(__file__))
158
+ repo_fonts_dir = os.path.join(script_dir, 'fonts')
159
+ fonts_to_copy = []
160
+ if os.path.exists(repo_fonts_dir):
161
+ for font_file in os.listdir(repo_fonts_dir):
162
+ if font_file.endswith(('.ttf', '.otf', '.TTF', '.OTF')):
163
+ fonts_to_copy.append(os.path.join(repo_fonts_dir, font_file))
164
+ for item in [REDDIT_CONFIG['font_file'], SUBTITLE_CONFIG['font_file']]:
165
+ font_path = os.path.join(script_dir, item)
166
+ if os.path.exists(font_path) and font_path not in fonts_to_copy:
167
+ fonts_to_copy.append(font_path)
168
+ # Also copy from resolved paths
169
+ for cfg in [REDDIT_CONFIG, SUBTITLE_CONFIG]:
170
+ rp = cfg.get('_resolved_font_path')
171
+ if rp and os.path.exists(rp) and rp not in fonts_to_copy:
172
+ fonts_to_copy.append(rp)
173
+ for src in fonts_to_copy:
174
+ dst = os.path.join(fonts_dir, os.path.basename(src))
175
+ shutil.copy(src, dst)
176
+ if fonts_to_copy:
177
+ fonts_conf = f"""<?xml version="1.0"?>
178
+ <fontconfig><dir>{fonts_dir}</dir><cachedir>{temp_dir}/cache</cachedir></fontconfig>"""
179
+ conf_path = os.path.join(temp_dir, 'fonts.conf')
180
+ with open(conf_path, 'w') as f:
181
+ f.write(fonts_conf)
182
+ env = os.environ.copy()
183
+ env['FONTCONFIG_FILE'] = conf_path
184
+ env['FONTCONFIG_PATH'] = temp_dir
185
+ return env
186
+ return os.environ.copy()
187
+ except Exception as e:
188
+ print(f"⚠️ Font setup error: {e}")
189
+ return os.environ.copy()
190
 
191
  def download_file_from_url(url, output_dir, filename):
192
+ try:
193
+ response = requests.get(url, stream=True, timeout=30)
194
+ response.raise_for_status()
195
+ file_path = os.path.join(output_dir, filename)
196
+ with open(file_path, 'wb') as f:
197
+ for chunk in response.iter_content(chunk_size=8192):
198
+ f.write(chunk)
199
+ return file_path
200
+ except Exception as e:
201
+ raise Exception(f"Failed to download file: {str(e)}")
202
 
203
  def download_book_cover(book_id, output_dir):
204
+ try:
205
+ image_url = f"https://books.google.com/books/publisher/content/images/frontcover/{book_id}"
206
+ response = requests.get(image_url, timeout=30)
207
+ response.raise_for_status()
208
+ image_path = os.path.join(output_dir, 'book_cover.png')
209
+ with open(image_path, 'wb') as f:
210
+ f.write(response.content)
211
+ Image.open(image_path).verify()
212
+ return image_path
213
+ except Exception as e:
214
+ raise Exception(f"Failed to download book cover: {str(e)}")
215
 
216
  def decode_base64_image(base64_string, output_dir):
217
+ try:
218
+ if ',' in base64_string and 'base64' in base64_string:
219
+ base64_string = base64_string.split(',', 1)[1]
220
+ image_data = base64.b64decode(base64_string.strip())
221
+ Image.open(BytesIO(image_data)).verify()
222
+ output_path = os.path.join(output_dir, f"book_cover_b64_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png")
223
+ Image.open(BytesIO(image_data)).save(output_path, 'PNG')
224
+ return output_path
225
+ except Exception as e:
226
+ raise Exception(f"Base64 decode failed: {str(e)}")
227
 
228
  def validate_book_cover_input(book_cover_file, book_cover_url, book_cover_base64, book_id, temp_dir):
229
+ has_file = book_cover_file is not None
230
+ has_url = bool(book_cover_url and book_cover_url.strip())
231
+ has_base64 = bool(book_cover_base64 and book_cover_base64.strip())
232
+ has_id = bool(book_id and book_id.strip())
233
+ methods_count = sum([has_file, has_url, has_base64, has_id])
234
+ if methods_count == 0:
235
+ return None, None
236
+ if methods_count > 1:
237
+ return None, "❌ Book Cover: Use only ONE method"
238
+ try:
239
+ if has_file:
240
+ return str(book_cover_file.name if hasattr(book_cover_file, 'name') else book_cover_file), None
241
+ if has_url:
242
+ return download_file_from_url(book_cover_url.strip(), temp_dir, f"book_cover_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"), None
243
+ if has_base64:
244
+ return decode_base64_image(book_cover_base64.strip(), temp_dir), None
245
+ if has_id:
246
+ return download_book_cover(book_id.strip(), temp_dir), None
247
+ except Exception as e:
248
+ return None, f"❌ Book cover error: {str(e)}"
249
+ return None, None
250
 
251
  def get_video_info(video_path):
252
+ try:
253
+ cmd_res = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=width,height", "-of", "csv=s=x:p=0", video_path]
254
+ result = subprocess.run(cmd_res, capture_output=True, text=True, check=True)
255
+ width, height = result.stdout.strip().split('x')
256
+ cmd_fps = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=r_frame_rate", "-of", "default=noprint_wrappers=1:nokey=1", video_path]
257
+ result = subprocess.run(cmd_fps, capture_output=True, text=True, check=True)
258
+ fps_str = result.stdout.strip()
259
+ fps = float(fps_str.split('/')[0]) / float(fps_str.split('/')[1]) if '/' in fps_str else float(fps_str)
260
+ return int(width), int(height), fps
261
+ except Exception as e:
262
+ raise Exception(f"Failed to get video info: {str(e)}")
263
 
264
  def get_audio_duration(audio_path):
265
+ try:
266
+ cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", audio_path]
267
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
268
+ return float(result.stdout.strip())
269
+ except Exception as e:
270
+ raise Exception(f"Failed to get audio duration: {str(e)}")
271
 
272
  def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
273
+ try:
274
+ template = Image.open(template_path).convert('RGBA')
275
+ temp_w, temp_h = template.size
276
+ print(f"πŸ“ Reddit template size: {temp_w}x{temp_h}")
277
+
278
+ box_w = int(temp_w * config['text_box_width_percent'])
279
+ box_h = int(temp_h * config['text_box_height_percent'])
280
+ print(f"πŸ“ Text box: {box_w}x{box_h}")
281
+
282
+ # Use scratch image for measurement instead of drawing on template
283
+ scratch = Image.new('RGBA', (1, 1))
284
+ draw_scratch = ImageDraw.Draw(scratch)
285
+
286
+ best_font_size = config['font_size_min']
287
+ best_wrapped_text = hook_text
288
+ best_font = None
289
+
290
+ for font_size in range(config['font_size_max'], config['font_size_min'] - 1, -2):
291
+ font = load_font_cached(config, font_size)
292
+
293
+ wrapped = textwrap.fill(hook_text, width=config['text_wrap_width'])
294
+ bbox = draw_scratch.multiline_textbbox((0, 0), wrapped, font=font, spacing=config['line_spacing'])
295
+ text_w = bbox[2] - bbox[0]
296
+ text_h = bbox[3] - bbox[1]
297
+
298
+ if text_w <= box_w and text_h <= box_h:
299
+ best_font_size = font_size
300
+ best_wrapped_text = wrapped
301
+ best_font = font
302
+ break
303
+
304
+ if best_font is None:
305
+ best_font = load_font_cached(config, config['font_size_min'])
306
+ best_wrapped_text = textwrap.fill(hook_text, width=config['text_wrap_width'])
307
+
308
+ print(f"πŸ”€ Final font size chosen: {best_font_size}")
309
+ print(f"πŸ”€ Font object type: {type(best_font)}")
310
+ print(f"πŸ”€ Text preview: {best_wrapped_text[:50]}...")
311
+
312
+ draw = ImageDraw.Draw(template)
313
+ bbox = draw.multiline_textbbox((0, 0), best_wrapped_text, font=best_font, spacing=config['line_spacing'])
314
+ x = (temp_w - (bbox[2] - bbox[0])) / 2
315
+ y = (temp_h - (bbox[3] - bbox[1])) / 2 + config['y_offset']
316
+
317
+ print(f"πŸ“ Drawing at position: ({x:.0f}, {y:.0f})")
318
+
319
+ draw.multiline_text(
320
+ (x, y), best_wrapped_text, fill=config['text_color'],
321
+ font=best_font, spacing=config['line_spacing'], align='left'
322
+ )
323
 
324
+ output_path = os.path.join(output_dir, 'reddit_card_composite.png')
325
+ template.save(output_path, 'PNG')
326
+ print(f"πŸ’Ύ Reddit card saved: {output_path}")
327
+ return output_path
328
+ except Exception as e:
329
+ raise Exception(f"Failed to create Reddit card: {str(e)}")
 
 
 
 
 
 
330
 
331
+ def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
332
+ has_upload = uploaded_file is not None
333
+ has_url = url_string and url_string.strip()
334
+ if not has_upload and not has_url:
335
+ return None, f"❌ Please provide {file_type}"
336
+ if has_upload and has_url:
337
+ return None, f"❌ Use only ONE method for {file_type}"
338
+ if has_upload:
339
+ return str(uploaded_file.name if hasattr(uploaded_file, 'name') else uploaded_file), None
340
+ if has_url:
341
+ try:
342
+ fname = f"{file_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{url_string.split('/')[-1] if url_string.split('/')[-1] else 'file'}"
343
+ return download_file_from_url(url_string.strip(), temp_dir, fname), None
344
+ except Exception as e:
345
+ return None, f"❌ Error downloading {file_type}: {str(e)}"
346
+ return None, f"❌ Unknown error"
347
 
348
  # ============================================
349
  # JSON LOGIC: PARSERS & SUBTITLE GENERATORS
 
354
  try:
355
  with open(json_path, 'r', encoding='utf-8') as f:
356
  data = json.load(f)
357
+
358
  title_words = []
359
  start_time = None
360
  end_time = 3.0
361
+
362
  for segment in data.get('segments', []):
363
  for word_data in segment.get('words', []):
364
  word_text = word_data.get('text', '').strip()
365
+ if not word_text:
366
+ continue
367
+
368
  if start_time is None:
369
  start_time = word_data.get('start_time', 0.0)
370
+
371
  title_words.append(word_text)
372
+
 
373
  if re.search(r'[.!?]$', word_text):
374
  end_time = word_data.get('end_time', 3.0)
375
  return " ".join(title_words), start_time, end_time
376
+
 
377
  if title_words:
378
  return " ".join(title_words), start_time, end_time
379
  return "No subtitle found", 0.0, 3.0
380
+ except Exception as e:
381
  print(f"Error extracting first subtitle: {e}")
382
  return "No subtitle found", 0.0, 3.0
383
 
 
 
 
384
  def find_title_and_cta(json_path, book_title):
385
  """Uses a sliding window to find the exact start and end millisecond of the book title."""
386
  try:
387
+ if not book_title or not book_title.strip():
388
+ return None, None
389
+
390
+ with open(json_path, 'r', encoding='utf-8') as f:
391
+ data = json.load(f)
392
+
393
  book_title_lower = book_title.lower()
394
  title_clean = re.sub(r'[^\w\s]', '', book_title_lower).strip()
395
  book_title_words = title_clean.split()
396
  window_size = len(book_title_words)
397
+
 
398
  all_words = []
399
  for segment in data.get('segments', []):
400
  for word_data in segment.get('words', []):
 
405
  'start': word_data.get('start_time', 0.0),
406
  'end': word_data.get('end_time', 0.0)
407
  })
408
+
409
  best_score = 0
410
  best_start = None
411
  best_end = None
412
+
 
413
  for w_size in [window_size, window_size + 1, window_size - 1]:
414
+ if w_size <= 0:
415
+ continue
416
  for i in range(len(all_words) - w_size + 1):
417
+ window_text = " ".join([w['text'] for w in all_words[i: i + w_size]]).lower()
418
  window_text_clean = re.sub(r'[^\w\s]', '', window_text).strip()
419
+
420
  score = fuzz.ratio(title_clean, window_text_clean)
421
  if score > best_score:
422
  best_score = score
423
  best_start = all_words[i]['start']
424
  best_end = all_words[i + w_size - 1]['end']
425
+
 
426
  if best_score >= 85:
427
  return best_start, best_end
428
+
429
  return None, None
430
  except Exception as e:
431
  print(f"Error finding title: {e}")
432
  return None, None
433
 
434
+ def create_body_ass_from_json(json_path, output_dir, highlight_color='yellow',
435
+ font_size=None, start_time_sec=0.0, config=SUBTITLE_CONFIG,
436
+ stop_time_sec=None):
437
  """Creates dynamic body subtitles starting at 1 word and increasing by 2 up to 50."""
438
+ if font_size is None:
439
+ font_size = config['font_size_default']
440
+ color_map = {
441
+ 'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'),
442
+ 'green': ('&H0000FF00', '&H00000000'), 'cyan': ('&H00FFFF00', '&H00000000'),
443
+ 'pink': ('&H00FF69B4', '&H00000000'), 'red': ('&H000000FF', '&H00FFFFFF'),
444
+ 'blue': ('&H00FF0000', '&H00FFFFFF')
445
+ }
446
  highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
447
 
448
  ass_path = os.path.join(output_dir, 'body_subtitles.ass')
449
  ass_header = f"""[Script Info]
450
+ Title: Body JSON Subtitles
451
+ ScriptType: v4.00+
452
+ [V4+ Styles]
453
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
454
+ Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,1,3,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
455
+ [Events]
456
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
457
 
458
+ with open(json_path, 'r', encoding='utf-8') as f:
459
+ data = json.load(f)
460
 
461
  all_words = []
462
  for segment in data.get('segments', []):
463
  for word_data in segment.get('words', []):
464
  word_text = word_data.get('text', '').strip()
465
+ start_ms = word_data.get('start_time', 0)
466
+ if start_ms < start_time_sec - 0.1:
467
+ continue
468
+ if stop_time_sec is not None and start_ms >= stop_time_sec - 0.1:
469
+ continue
470
  if word_text:
471
  all_words.append({'word': word_text, 'start': start_ms, 'end': word_data.get('end_time', 0)})
472
 
 
474
  i = 0
475
  current_chunk_size = 1
476
  max_chunk_size = 50
477
+
478
  while i < len(all_words):
479
  remaining = len(all_words) - i
480
  take = min(current_chunk_size, remaining)
481
+ chunks.append(all_words[i: i + take])
482
  i += take
483
  if current_chunk_size < max_chunk_size:
484
  current_chunk_size = min(current_chunk_size + 4, max_chunk_size)
 
489
  frame_end = chunk[-1]['end']
490
  for idx, info in enumerate(chunk):
491
  w_start = info['start']
492
+ w_end = chunk[idx + 1]['start'] if idx + 1 < len(chunk) else frame_end
493
+
494
  text_parts = []
495
  for j, word_str in enumerate(chunk_text_only):
496
+ if j == idx:
497
+ text_parts.append(f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{word_str}{{\\r}}")
498
+ else:
499
+ text_parts.append(word_str)
500
+ ass_events.append(
501
+ f"Dialogue: 0,{sec_to_ass_time(w_start)},{sec_to_ass_time(w_end)},Default,,0,0,0,,{' '.join(text_parts)}")
502
+
503
+ with open(ass_path, 'w', encoding='utf-8') as f:
504
  f.write(ass_header + '\n'.join(ass_events))
505
  return ass_path
506
 
507
+ def create_cta_ass_from_json(json_path, output_dir, start_sec, font_size, video_width, video_height,
508
+ highlight_color='yellow', config=SUBTITLE_CONFIG, words_per_frame=10):
509
  """Creates the chunky, Instagram-style box subtitles for the CTA."""
510
  color_map = {
511
  'yellow': ('&H00000000', '&H0000FFFF'), 'orange': ('&H0000A5FF', '&H00000000'),
 
514
  'blue': ('&H00FF0000', '&H00FFFFFF')
515
  }
516
  highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
517
+ margin_lr = int(video_width * 0.125) + 40
518
 
519
  ass_path = os.path.join(output_dir, 'cta_subtitles.ass')
 
520
  ass_header = f"""[Script Info]
521
+ Title: CTA JSON Subtitles
522
+ ScriptType: v4.00+
523
+ PlayResX: {video_width}
524
+ PlayResY: {video_height}
525
+ WrapStyle: 1
526
+ [V4+ Styles]
527
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
528
+ Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,85,0,0,3,10,0,5,{margin_lr},{margin_lr},0,1
529
+ [Events]
530
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"""
531
+
532
+ with open(json_path, 'r', encoding='utf-8') as f:
533
+ data = json.load(f)
534
 
535
  all_cta_words = []
536
  for segment in data.get('segments', []):
537
  for word_data in segment.get('words', []):
538
  word_text = word_data.get('text', '').strip()
539
  start_ms = word_data.get('start_time', 0)
540
+ if start_ms < start_sec - 0.1:
541
+ continue
542
  if word_text:
543
+ if word_text.lower().startswith('access') and len(all_cta_words) > 0 and all_cta_words[-1][
544
+ 'word'].lower() == 'book':
 
545
  punctuation = word_text[6:]
546
  all_cta_words[-1]['word'] = 'BookXcess' + punctuation
 
547
  all_cta_words[-1]['end'] = word_data.get('end_time', 0)
548
+ continue
549
+ all_cta_words.append(
550
+ {'word': word_text, 'start': start_ms, 'end': word_data.get('end_time', 0)})
551
 
552
  chunks = []
553
  i = 0
554
  while i < len(all_cta_words):
555
  remaining = len(all_cta_words) - i
556
  take = remaining if words_per_frame < remaining <= words_per_frame + 2 else min(words_per_frame, remaining)
557
+ chunks.append(all_cta_words[i: i + take])
558
  i += take
559
 
560
  ass_events = []
 
563
  frame_end = chunk[-1]['end']
564
  for idx, info in enumerate(chunk):
565
  w_start = info['start']
566
+ w_end = chunk[idx + 1]['start'] if idx + 1 < len(chunk) else frame_end
567
+
568
  text_parts = []
569
  for j, word_str in enumerate(chunk_text_only):
570
+ if j == idx:
571
+ text_parts.append(f"{{\\c{highlight_text}}}{word_str}{{\\r}}")
572
+ else:
573
+ text_parts.append(word_str)
574
+ ass_events.append(
575
+ f"Dialogue: 1,{sec_to_ass_time(w_start)},{sec_to_ass_time(w_end)},Default,,0,0,0,,{' '.join(text_parts)}")
576
+
577
+ with open(ass_path, 'w', encoding='utf-8') as f:
578
  f.write(ass_header + '\n'.join(ass_events))
579
  return ass_path
580
 
 
582
  # MAIN STITCH FUNCTION
583
  # =========================
584
 
585
+ def stitch_media(video_file, video_url, audio_file, audio_url, subtitle_file, subtitle_url,
586
+ book_cover_file, book_cover_url, book_cover_base64, book_id, book_title,
587
+ enable_highlight, highlight_color, font_size, crf_quality=23):
588
+ temp_dir = tempfile.mkdtemp()
589
+ status_msg = "πŸš€ Starting video stitching...\n"
590
+ try:
591
+ ffmpeg_env = setup_custom_fonts_hf(temp_dir)
592
+ video_path, v_err = validate_and_get_file(video_file, video_url, 'video', temp_dir)
593
+ if v_err:
594
+ return None, v_err
595
+ audio_path, a_err = validate_and_get_file(audio_file, audio_url, 'audio', temp_dir)
596
+ if a_err:
597
+ return None, a_err
598
+ subtitle_path, s_err = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
599
+ if s_err:
600
+ return None, s_err
601
+
602
+ # ✨ PRE-PROCESS SPEED HACK ✨
603
+ speed_factor = 1.3
604
+
605
+ # 1. Physically speed up the audio file
606
+ fast_audio = os.path.join(temp_dir, f"fast_audio_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp3")
607
+ subprocess.run(
608
+ ["ffmpeg", "-v", "error", "-y", "-i", audio_path, "-filter:a", f"atempo={speed_factor}", fast_audio],
609
+ check=True)
610
+ audio_path = fast_audio
611
+
612
+ # 2. Physically shrink the JSON timestamps
613
+ fast_json = os.path.join(temp_dir, f"fast_subs_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
614
+ with open(subtitle_path, 'r', encoding='utf-8') as f:
615
+ json_data = json.load(f)
616
+
617
+ for segment in json_data.get('segments', []):
618
+ segment['start_time'] = segment.get('start_time', 0) / speed_factor
619
+ segment['end_time'] = segment.get('end_time', 0) / speed_factor
620
+ for word in segment.get('words', []):
621
+ word['start_time'] = word.get('start_time', 0) / speed_factor
622
+ word['end_time'] = word.get('end_time', 0) / speed_factor
623
+
624
+ with open(fast_json, 'w', encoding='utf-8') as f:
625
+ json.dump(json_data, f)
626
+ subtitle_path = fast_json
627
+
628
+ video_width, video_height, video_fps = get_video_info(video_path)
629
+ audio_duration = get_audio_duration(audio_path)
630
+
631
+ script_dir = os.path.dirname(os.path.abspath(__file__))
632
+ reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
633
+ has_reddit_template = os.path.exists(reddit_template_path)
634
+
635
+ first_sub_start = 0
636
+ first_sub_end = 0
637
+ if has_reddit_template:
638
+ try:
639
+ first_sub_text, first_sub_start, first_sub_end = extract_first_subtitle(subtitle_path)
640
+ status_msg += f"\nπŸ“± Reddit Overlay: '{first_sub_text[:30]}...'\n"
641
+ reddit_card_path = create_reddit_card_with_text(reddit_template_path, first_sub_text, temp_dir,
642
+ REDDIT_CONFIG)
643
+ except Exception as e:
644
+ status_msg += f" β€’ ⚠️ Reddit card failed: {str(e)}\n"
645
+ has_reddit_template = False
646
+
647
+ # --- 1. Find Title Exact Word Timings ---
648
+ title_start, title_end = find_title_and_cta(subtitle_path, book_title)
649
+
650
+ book_appears_at = title_start if title_start is not None else audio_duration * (
651
+ 1 - VIDEO_CONFIG['promo_percent'])
652
+ box_appears_at = title_end if title_end is not None else book_appears_at + 1.5
653
+
654
+ if title_start is not None:
655
+ status_msg += f"\nπŸ“– Hard cut to Book Cover at {title_start:.2f}s\n"
656
+ status_msg += f"🀫 Book title silenced in subtitles.\n"
657
+ status_msg += f"πŸ–€ CTA text starts exactly at {title_end:.2f}s\n"
658
+
659
+ # --- 2. Prepare Dynamic CTA Text (JSON) ---
660
+ status_msg += "πŸ–€ Generating Instagram-style dynamic CTA...\n"
661
+ cta_font_size = int(video_width * 0.060)
662
+
663
+ cta_ass_path = create_cta_ass_from_json(
664
+ subtitle_path, temp_dir, box_appears_at,
665
+ cta_font_size, video_width, video_height, highlight_color
666
+ )
667
+ cta_sub_escaped = cta_ass_path.replace('\\', '/').replace(':', '\\:')
668
+
669
+ # --- 3. Process Main Subtitles (JSON) ---
670
+ if enable_highlight:
671
+ status_msg += f"\n✨ Processing JSON subtitles...\n"
672
+ body_start_time = first_sub_end if has_reddit_template else 0.0
673
+ main_subtitle_path = create_body_ass_from_json(
674
+ subtitle_path, temp_dir, highlight_color, font_size,
675
+ start_time_sec=body_start_time, config=SUBTITLE_CONFIG,
676
+ stop_time_sec=book_appears_at
677
+ )
678
+ else:
679
+ main_subtitle_path = subtitle_path
680
+
681
+ main_sub_escaped = main_subtitle_path.replace('\\', '/').replace(':', '\\:')
682
+
683
+ book_cover_path, book_error = validate_book_cover_input(book_cover_file, book_cover_url, book_cover_base64,
684
+ book_id, temp_dir)
685
+ if book_error:
686
+ return None, book_error
687
+ has_book_cover = book_cover_path is not None
688
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
689
+ output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
690
+
691
+ if has_book_cover:
692
+ try:
693
+ fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
694
+ fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
695
+
696
+ if fade_ends_at > book_appears_at:
697
+ fade_ends_at = book_appears_at
698
+ fade_starts_at = min(fade_starts_at, fade_ends_at - 1.0)
699
+
700
+ fade_out_duration = fade_ends_at - fade_starts_at
701
+ solid_color_duration = max(0, book_appears_at - fade_ends_at)
702
+
703
+ main_video_duration = fade_ends_at
704
+ cover_segment_duration = audio_duration - book_appears_at
705
+ fade_color_hex = "#dacfc3"
706
+
707
+ # 1. Main Segment
708
+ main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
709
+ cmd_main = ["ffmpeg", "-stream_loop", "-1", "-i", video_path, "-t", str(main_video_duration), "-vf",
710
+ f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}",
711
+ "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y",
712
+ main_segment_path]
713
+ subprocess.run(cmd_main, check=True, capture_output=True, text=True, env=ffmpeg_env)
714
+
715
+ # 2. Solid Color Segment
716
+ solid_color_path = None
717
+ if solid_color_duration > 0:
718
+ solid_color_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
719
+ cmd_solid = ["ffmpeg", "-f", "lavfi", "-i",
720
+ f"color=c={fade_color_hex}:s={video_width}x{video_height}:d={solid_color_duration}:r={video_fps}",
721
+ "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-y",
722
+ solid_color_path]
723
+ subprocess.run(cmd_solid, check=True, capture_output=True, text=True, env=ffmpeg_env)
724
+
725
+ # 3. Book Cover Segment
726
+ cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
727
+ cmd_cover = ["ffmpeg", "-loop", "1", "-i", book_cover_path, "-t", str(cover_segment_duration), "-vf",
728
+ f"scale={video_width}:{video_height},setsar=1,fps={video_fps}", "-c:v", "libx264",
729
+ "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path]
730
+ subprocess.run(cmd_cover, check=True, capture_output=True, text=True, env=ffmpeg_env)
731
+
732
+ # 4. Stitch them together
733
+ concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
734
+ with open(concat_list_path, 'w') as f:
735
+ f.write(f"file '{main_segment_path}'\n")
736
+ if solid_color_path:
737
+ f.write(f"file '{solid_color_path}'\n")
738
+ f.write(f"file '{cover_segment_path}'\n")
739
+
740
+ # 5. Build the Filter Graph
741
+ input_cmd = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path]
742
+ curr_idx = 1
743
+ curr_stream = "[0:v]"
744
+
745
+ if has_reddit_template:
746
  input_cmd += ["-loop", "1", "-i", reddit_card_path]
747
  filter_complex = f"[{curr_idx}:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];{curr_stream}[reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v1];"
748
  curr_stream, curr_idx = "[v1]", curr_idx + 1
749
+ else:
750
+ filter_complex = f"{curr_stream}copy[v1];"
751
+ curr_stream = "[v1]"
752
+
753
+ # Burn in Main Subtitles
754
+ filter_complex += f"{curr_stream}ass={main_sub_escaped}[v2];"
755
+ curr_stream = "[v2]"
756
+
757
+ # Burn in CTA Subtitles
758
+ if cta_ass_path:
759
+ filter_complex += f"{curr_stream}ass={cta_sub_escaped}[v_final]"
760
+ else:
761
+ filter_complex += f"{curr_stream}copy[v_final]"
762
+
763
+ input_cmd += ["-i", audio_path]
764
+
765
+ cmd_final = input_cmd + [
766
+ "-filter_complex", filter_complex,
767
+ "-map", "[v_final]", "-map", f"{curr_idx}:a",
768
+ "-c:v", "libx264", "-crf", str(crf_quality),
769
+ "-c:a", "aac", "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
770
+ ]
771
+
772
+ status_msg += "🎬 Rendering final synchronized video...\n"
773
+ subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
774
+ except Exception as e:
775
+ return None, f"❌ Book cover processing error: {str(e)}"
776
+
777
+ if os.path.exists(output_path):
778
+ return output_path, f"βœ… Success!"
779
+ else:
780
+ return None, "❌ Output not created"
781
+ except Exception as e:
782
+ return None, f"❌ Error: {str(e)}"
783
+
784
+ # =========================
785
+ # FASTAPI APP
786
+ # =========================
787
+
788
  app = FastAPI(title="Video Stitcher API")
789
 
790
  app.add_middleware(
 
807
  f.write(upload_file.file.read())
808
  return dest_path
809
 
810
+ @app.on_event("startup")
811
+ def verify_fonts_on_startup():
812
+ """Runs once when the server boots. Resolves and caches all font paths."""
813
+ print("\n" + "=" * 60)
814
+ print("πŸš€ SERVER STARTUP β€” FONT VERIFICATION")
815
+ print("=" * 60)
816
+
817
+ script_dir = os.path.dirname(os.path.abspath(__file__))
818
+ cwd = os.getcwd()
819
+ print(f"πŸ“‚ __file__: {os.path.abspath(__file__)}")
820
+ print(f"πŸ“‚ Script dir: {script_dir}")
821
+ print(f"πŸ“‚ CWD: {cwd}")
822
+ print(f"πŸ“‚ CWD contents: {os.listdir(cwd)}")
823
+
824
+ fonts_dir = os.path.join(script_dir, 'fonts')
825
+ if os.path.exists(fonts_dir):
826
+ print(f"πŸ“‚ fonts/ contents: {os.listdir(fonts_dir)}")
827
+ else:
828
+ print(f"❌ fonts/ directory NOT FOUND at {fonts_dir}")
829
+
830
+ # Also check CWD fonts
831
+ cwd_fonts = os.path.join(cwd, 'fonts')
832
+ if os.path.exists(cwd_fonts) and cwd_fonts != fonts_dir:
833
+ print(f"πŸ“‚ CWD fonts/ contents: {os.listdir(cwd_fonts)}")
834
+
835
+ # Resolve each font
836
+ resolve_font_at_startup(REDDIT_CONFIG['font_file'], REDDIT_CONFIG)
837
+ resolve_font_at_startup(SUBTITLE_CONFIG['font_file'], SUBTITLE_CONFIG)
838
+
839
+ # Final summary
840
+ print("\n" + "=" * 60)
841
+ print("πŸ“‹ FONT RESOLUTION SUMMARY")
842
+ print(f" Reddit font: {REDDIT_CONFIG.get('_resolved_font_path', '❌ NOT FOUND')}")
843
+ print(f" Subtitle font: {SUBTITLE_CONFIG.get('_resolved_font_path', '❌ NOT FOUND')}")
844
+ print("=" * 60 + "\n")
845
+
846
+ @app.post('/video_stitch',
847
+ responses={400: {"model": StitchErrorResponse}, 500: {"model": StitchErrorResponse}})
848
  async def stitch_upload(
849
+ request: Request,
850
+ video_file: Optional[UploadFile] = File(None),
851
+ video_url: Optional[str] = Form(None),
852
+ audio_file: Optional[UploadFile] = File(None),
853
+ audio_url: Optional[str] = Form(None),
854
+ subtitle_file: Optional[UploadFile] = File(None),
855
+ subtitle_url: Optional[str] = Form(None),
856
+ book_cover_file: Optional[UploadFile] = File(None),
857
+ book_cover_url: Optional[str] = Form(None),
858
+ book_cover_base64: Optional[str] = Form(None),
859
+ book_id: Optional[str] = Form(None),
860
+ book_title: Optional[str] = Form(None),
861
+ enable_highlight: bool = Form(True),
862
+ highlight_color: str = Form('yellow'),
863
+ font_size: int = Form(10),
864
+ crf_quality: int = Form(23),
865
+ ):
866
  # Format validation
867
  if subtitle_file and not subtitle_file.filename.endswith('.json'):
868
  raise HTTPException(status_code=422, detail="❌ Subtitle must be a .json file")
869
  if subtitle_url and not subtitle_url.strip().split('?')[0].endswith('.json'):
870
  raise HTTPException(status_code=422, detail="❌ Subtitle URL must point to a .json file")
871
+ if audio_file and audio_file.content_type not in {"audio/mpeg", "audio/mp3", "audio/wav", "audio/x-wav",
872
+ "audio/aac", "audio/mp4", "audio/x-m4a"}:
873
  raise HTTPException(status_code=422, detail=f"❌ Invalid audio format: {audio_file.content_type}")
874
  if book_cover_file and book_cover_file.content_type not in {"image/jpeg", "image/png", "image/webp"}:
875
  raise HTTPException(status_code=422, detail="❌ Book cover must be jpeg, png, or webp")
 
941
 
942
  @app.get('/health')
943
  async def health():
944
+ return {
945
+ "status": "ok",
946
+ "reddit_font": REDDIT_CONFIG.get('_resolved_font_path', 'NOT RESOLVED'),
947
+ "subtitle_font": SUBTITLE_CONFIG.get('_resolved_font_path', 'NOT RESOLVED'),
948
+ }