Nav3005 commited on
Commit
23b54a0
·
verified ·
1 Parent(s): 3a97dde

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +28 -0
  2. app.py +873 -0
  3. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Install system dependencies needed for Pillow, fontconfig, and ffmpeg
4
+ RUN apt-get update && apt-get install -y \
5
+ libgl1-mesa-glx \
6
+ libglib2.0-0 \
7
+ libgomp1 \
8
+ fontconfig \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ WORKDIR /app
12
+
13
+ # Install Python dependencies first (layer caching)
14
+ COPY requirements.txt .
15
+ RUN pip install --no-cache-dir -r requirements.txt
16
+
17
+ # Copy application files
18
+ COPY app.py .
19
+ COPY reddit_template.png . 2>/dev/null || true
20
+
21
+ # Copy fonts if they exist
22
+ COPY fonts/ ./fonts/ 2>/dev/null || true
23
+
24
+ # Hugging Face Spaces requires port 7860
25
+ EXPOSE 7860
26
+
27
+ # Run with uvicorn on the required HF port
28
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
app.py ADDED
@@ -0,0 +1,873 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import static_ffmpeg
3
+ import os
4
+ import tempfile
5
+ import requests
6
+ import re
7
+ import textwrap
8
+ import shutil
9
+ import time
10
+ import uuid
11
+ import asyncio
12
+ from datetime import datetime
13
+ from PIL import Image, ImageDraw, ImageFont
14
+ from functools import lru_cache
15
+ from typing import Optional
16
+ from concurrent.futures import ThreadPoolExecutor
17
+
18
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException, BackgroundTasks, Request
19
+ from fastapi.responses import FileResponse, JSONResponse
20
+ from fastapi.middleware.cors import CORSMiddleware
21
+ from pydantic import BaseModel, Field
22
+ import aiofiles
23
+
24
+
25
+ # ========================================
26
+ # CONFIGURATION SECTION - CUSTOMIZE HERE
27
+ # ========================================
28
+
29
+ # Reddit Template Text Settings
30
+ REDDIT_CONFIG = {
31
+ 'template_file': 'reddit_template.png', # Template filename in script directory
32
+ 'font_file': 'RFDewi-Bold.ttf', # Font file for Reddit text
33
+ 'font_size_max': 180, # Maximum font size to try
34
+ 'font_size_min': 16, # Minimum font size (if text too long)
35
+ 'text_wrap_width': 35, # Characters per line for wrapping
36
+ 'text_color': 'black', # Text color
37
+ 'line_spacing': 10, # Spacing between lines
38
+ 'text_box_width_percent': 0.85, # 80% of template width
39
+ 'text_box_height_percent': 0.65, # 50% of template height
40
+ 'y_offset': 20, # Vertical offset from center
41
+ }
42
+
43
+ # Word-by-Word Subtitle Settings
44
+ SUBTITLE_CONFIG = {
45
+ 'font_file': 'TitanOne-Regular.ttf', # Font file for subtitles (TTF or OTF)
46
+ 'font_name': 'Titan One', # Font name as it appears in system
47
+ 'font_size_default': 12, # Default subtitle font size
48
+ 'position_alignment': 5, # 5 = center (1-9 numpad layout)
49
+ 'margin_left': 50,
50
+ 'margin_right': 70,
51
+ 'margin_vertical': 0,
52
+ }
53
+
54
+ # Video Processing Settings
55
+ VIDEO_CONFIG = {
56
+ 'reddit_scale_percent': 0.75, # Reddit template size (0.75 = 75% of video width)
57
+ 'fade_start_percent': 0.70, # When fade to color starts (60%)
58
+ 'fade_end_percent': 0.83, # When fully faded to color (75%)
59
+ 'promo_percent': 0.1, # Last 10% for book cover
60
+ 'fade_color_rgb': (218, 207, 195), # Fade color RGB
61
+ 'book_fade_in_duration': 2, # Book cover fade-in duration (seconds)
62
+ }
63
+
64
+ # ========================================
65
+ # END CONFIGURATION SECTION
66
+ # ========================================
67
+
68
+ # Add static ffmpeg to PATH
69
+ static_ffmpeg.add_paths()
70
+
71
+ # Thread pool for background jobs (max 2 concurrent encoding jobs)
72
+ executor = ThreadPoolExecutor(max_workers=2)
73
+
74
+ def setup_custom_fonts_hf(temp_dir):
75
+ """
76
+ Setup custom fonts for FFmpeg/libass - Hugging Face Spaces compatible.
77
+ """
78
+ try:
79
+ fonts_dir = os.path.join(temp_dir, 'fonts')
80
+ os.makedirs(fonts_dir, exist_ok=True)
81
+
82
+ script_dir = os.path.dirname(os.path.abspath(__file__))
83
+ repo_fonts_dir = os.path.join(script_dir, 'fonts')
84
+
85
+ fonts_to_copy = []
86
+
87
+ if os.path.exists(repo_fonts_dir):
88
+ for font_file in os.listdir(repo_fonts_dir):
89
+ if font_file.endswith(('.ttf', '.otf', '.TTF', '.OTF')):
90
+ fonts_to_copy.append(os.path.join(repo_fonts_dir, font_file))
91
+
92
+ for item in [REDDIT_CONFIG['font_file'], SUBTITLE_CONFIG['font_file']]:
93
+ font_path = os.path.join(script_dir, item)
94
+ if os.path.exists(font_path) and font_path not in fonts_to_copy:
95
+ fonts_to_copy.append(font_path)
96
+
97
+ for src in fonts_to_copy:
98
+ dst = os.path.join(fonts_dir, os.path.basename(src))
99
+ shutil.copy(src, dst)
100
+
101
+ if fonts_to_copy:
102
+ fonts_conf = f"""<?xml version="1.0"?>
103
+ <fontconfig>
104
+ <dir>{fonts_dir}</dir>
105
+ <cachedir>{temp_dir}/cache</cachedir>
106
+ </fontconfig>
107
+ """
108
+ conf_path = os.path.join(temp_dir, 'fonts.conf')
109
+ with open(conf_path, 'w') as f:
110
+ f.write(fonts_conf)
111
+
112
+ env = os.environ.copy()
113
+ env['FONTCONFIG_FILE'] = conf_path
114
+ env['FONTCONFIG_PATH'] = temp_dir
115
+ return env
116
+
117
+ return os.environ.copy()
118
+
119
+ except Exception as e:
120
+ return os.environ.copy()
121
+
122
+ def download_file_from_url(url, output_dir, filename):
123
+ """Download a file from URL and save it to output directory."""
124
+ try:
125
+ response = requests.get(url, stream=True, timeout=30)
126
+ response.raise_for_status()
127
+
128
+ file_path = os.path.join(output_dir, filename)
129
+ with open(file_path, 'wb') as f:
130
+ for chunk in response.iter_content(chunk_size=8192):
131
+ f.write(chunk)
132
+
133
+ return file_path
134
+ except Exception as e:
135
+ raise Exception(f"Failed to download file from URL: {str(e)}")
136
+
137
+ def download_book_cover(book_id, output_dir):
138
+ """Download book cover from Google Books API using Book ID."""
139
+ try:
140
+ image_url = f"https://books.google.com/books/publisher/content/images/frontcover/{book_id}"
141
+
142
+ response = requests.get(image_url, timeout=30)
143
+ response.raise_for_status()
144
+
145
+ image_path = os.path.join(output_dir, 'book_cover.png')
146
+ with open(image_path, 'wb') as f:
147
+ f.write(response.content)
148
+
149
+ img = Image.open(image_path)
150
+ img.verify()
151
+
152
+ return image_path
153
+ except Exception as e:
154
+ raise Exception(f"Failed to download book cover: {str(e)}")
155
+
156
+ def get_video_info(video_path):
157
+ """Get video resolution and frame rate using ffprobe."""
158
+ try:
159
+ cmd_res = [
160
+ "ffprobe", "-v", "error", "-select_streams", "v:0",
161
+ "-show_entries", "stream=width,height", "-of", "csv=s=x:p=0", video_path
162
+ ]
163
+ result = subprocess.run(cmd_res, capture_output=True, text=True, check=True)
164
+ width, height = result.stdout.strip().split('x')
165
+
166
+ cmd_fps = [
167
+ "ffprobe", "-v", "error", "-select_streams", "v:0",
168
+ "-show_entries", "stream=r_frame_rate", "-of", "default=noprint_wrappers=1:nokey=1", video_path
169
+ ]
170
+ result = subprocess.run(cmd_fps, capture_output=True, text=True, check=True)
171
+ fps_str = result.stdout.strip()
172
+
173
+ if '/' in fps_str:
174
+ num, den = fps_str.split('/')
175
+ fps = float(num) / float(den)
176
+ else:
177
+ fps = float(fps_str)
178
+
179
+ return int(width), int(height), fps
180
+ except Exception as e:
181
+ raise Exception(f"Failed to get video info: {str(e)}")
182
+
183
+ def get_audio_duration(audio_path):
184
+ """Get audio duration in seconds using ffprobe."""
185
+ try:
186
+ cmd = [
187
+ "ffprobe", "-v", "error", "-show_entries", "format=duration",
188
+ "-of", "default=noprint_wrappers=1:nokey=1", audio_path
189
+ ]
190
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
191
+ return float(result.stdout.strip())
192
+ except Exception as e:
193
+ raise Exception(f"Failed to get audio duration: {str(e)}")
194
+
195
+ def extract_first_subtitle(srt_path):
196
+ """Extract first subtitle entry. Returns: (text, start_sec, end_sec)"""
197
+ try:
198
+ with open(srt_path, 'r', encoding='utf-8') as f:
199
+ content = f.read()
200
+
201
+ blocks = re.split(r'\n\s*\n', content.strip())
202
+ if not blocks:
203
+ return "No subtitle found", 0.0, 3.0
204
+
205
+ first_block = blocks[0].strip().split('\n')
206
+ if len(first_block) >= 3:
207
+ times = first_block[1].split(' --> ')
208
+
209
+ def time_to_sec(t):
210
+ h, m, s = t.split(':')
211
+ s, ms = s.split(',')
212
+ return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000.0
213
+
214
+ start_sec = time_to_sec(times[0].strip())
215
+ end_sec = time_to_sec(times[1].strip())
216
+ text = ' '.join(first_block[2:]).strip()
217
+
218
+ return text, start_sec, end_sec
219
+
220
+ return "No subtitle found", 0.0, 3.0
221
+ except Exception as e:
222
+ raise Exception(f"Failed to extract first subtitle: {str(e)}")
223
+
224
+ def create_reddit_card_with_text(template_path, hook_text, output_dir, config=REDDIT_CONFIG):
225
+ """Create Reddit card with text using PIL."""
226
+ try:
227
+ template = Image.open(template_path).convert('RGBA')
228
+ template_width, template_height = template.size
229
+
230
+ text_box_width = int(template_width * config['text_box_width_percent'])
231
+ text_box_height = int(template_height * config['text_box_height_percent'])
232
+
233
+ best_font_size = config['font_size_max']
234
+ best_wrapped_text = hook_text
235
+
236
+ script_dir = os.path.dirname(os.path.abspath(__file__))
237
+ font_paths = [
238
+ os.path.join(script_dir, 'fonts', config['font_file']),
239
+ os.path.join(script_dir, config['font_file'])
240
+ ]
241
+
242
+ for font_size in range(config['font_size_max'], config['font_size_min'] - 1, -2):
243
+ font = None
244
+ for font_path in font_paths:
245
+ if os.path.exists(font_path):
246
+ try:
247
+ font = ImageFont.truetype(font_path, font_size)
248
+ break
249
+ except:
250
+ pass
251
+
252
+ if font is None:
253
+ try:
254
+ font = ImageFont.truetype('Verdana', font_size)
255
+ except:
256
+ font = ImageFont.load_default()
257
+
258
+ wrapped = textwrap.fill(hook_text, width=config['text_wrap_width'])
259
+ draw = ImageDraw.Draw(template)
260
+ bbox = draw.multiline_textbbox((0, 0), wrapped, font=font, spacing=config['line_spacing'])
261
+ text_width = bbox[2] - bbox[0]
262
+ text_height = bbox[3] - bbox[1]
263
+
264
+ if text_width <= text_box_width and text_height <= text_box_height:
265
+ best_font_size = font_size
266
+ best_wrapped_text = wrapped
267
+ break
268
+
269
+ font = None
270
+ for font_path in font_paths:
271
+ if os.path.exists(font_path):
272
+ try:
273
+ font = ImageFont.truetype(font_path, best_font_size)
274
+ break
275
+ except:
276
+ pass
277
+
278
+ if font is None:
279
+ try:
280
+ font = ImageFont.truetype('Verdana', best_font_size)
281
+ except:
282
+ font = ImageFont.load_default()
283
+
284
+ draw = ImageDraw.Draw(template)
285
+ bbox = draw.multiline_textbbox((0, 0), best_wrapped_text, font=font, spacing=config['line_spacing'])
286
+ text_width = bbox[2] - bbox[0]
287
+ text_height = bbox[3] - bbox[1]
288
+
289
+ x = (template_width - text_width) / 2
290
+ y = (template_height - text_height) / 2 + config['y_offset']
291
+
292
+ draw.multiline_text(
293
+ (x, y),
294
+ best_wrapped_text,
295
+ fill=config['text_color'],
296
+ font=font,
297
+ spacing=config['line_spacing'],
298
+ align='left'
299
+ )
300
+
301
+ output_path = os.path.join(output_dir, 'reddit_card_composite.png')
302
+ template.save(output_path, 'PNG')
303
+
304
+ return output_path
305
+ except Exception as e:
306
+ raise Exception(f"Failed to create Reddit card: {str(e)}")
307
+
308
+ def validate_and_get_file(uploaded_file, url_string, file_type, temp_dir):
309
+ """Validate that only one input method is used and return the file path."""
310
+ has_upload = uploaded_file is not None
311
+ has_url = url_string and url_string.strip()
312
+
313
+ if not has_upload and not has_url:
314
+ return None, f"❌ Please provide {file_type} either by upload or URL"
315
+
316
+ if has_upload and has_url:
317
+ return None, f"❌ Please use only ONE method for {file_type}: either upload OR URL (not both)"
318
+
319
+ if has_upload:
320
+ file_path = uploaded_file.name if hasattr(uploaded_file, 'name') else uploaded_file
321
+ return file_path, None
322
+
323
+ if has_url:
324
+ try:
325
+ url_parts = url_string.strip().split('/')
326
+ original_filename = url_parts[-1] if url_parts else f"{file_type}_file"
327
+
328
+ if '.' not in original_filename:
329
+ ext_map = {'video': '.mp4', 'audio': '.wav', 'subtitle': '.srt'}
330
+ original_filename += ext_map.get(file_type, '.tmp')
331
+
332
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
333
+ filename = f"{file_type}_{timestamp}_{original_filename}"
334
+
335
+ file_path = download_file_from_url(url_string.strip(), temp_dir, filename)
336
+ return file_path, None
337
+ except Exception as e:
338
+ return None, f"❌ Error downloading {file_type} from URL: {str(e)}"
339
+
340
+ return None, f"❌ Unknown error processing {file_type}"
341
+
342
+ def srt_time_to_ms(time_str):
343
+ """Convert SRT timestamp to milliseconds."""
344
+ time_str = time_str.strip()
345
+ hours, minutes, seconds = time_str.split(':')
346
+ seconds, milliseconds = seconds.split(',')
347
+ return (int(hours) * 3600000 + int(minutes) * 60000 +
348
+ int(seconds) * 1000 + int(milliseconds))
349
+
350
+ def ms_to_ass_time(ms):
351
+ """Convert milliseconds to ASS timestamp format."""
352
+ hours = ms // 3600000
353
+ ms %= 3600000
354
+ minutes = ms // 60000
355
+ ms %= 60000
356
+ seconds = ms // 1000
357
+ centiseconds = (ms % 1000) // 10
358
+ return f"{hours}:{minutes:02d}:{seconds:02d}.{centiseconds:02d}"
359
+
360
+ def create_word_by_word_highlight_ass(srt_path, output_dir, highlight_color='yellow',
361
+ font_size=None, skip_first=False, config=SUBTITLE_CONFIG):
362
+ """Convert SRT to ASS with word-by-word highlighting."""
363
+ if font_size is None:
364
+ font_size = config['font_size_default']
365
+
366
+ color_map = {
367
+ 'yellow': ('&H00000000', '&H0000FFFF'),
368
+ 'orange': ('&H0000A5FF', '&H00000000'),
369
+ 'green': ('&H0000FF00', '&H00000000'),
370
+ 'cyan': ('&H00FFFF00', '&H00000000'),
371
+ 'pink': ('&H00FF69B4', '&H00000000'),
372
+ 'red': ('&H000000FF', '&H00FFFFFF'),
373
+ 'blue': ('&H00FF0000', '&H00FFFFFF'),
374
+ }
375
+
376
+ highlight_bg, highlight_text = color_map.get(highlight_color.lower(), ('&H00000000', '&H0000FFFF'))
377
+
378
+ with open(srt_path, 'r', encoding='utf-8') as f:
379
+ srt_content = f.read()
380
+
381
+ ass_path = os.path.join(output_dir, 'word_highlight_subtitles.ass')
382
+
383
+ ass_header = f"""[Script Info]
384
+ Title: Word-by-Word Highlight Subtitles
385
+ ScriptType: v4.00+
386
+ Collisions: Normal
387
+ PlayDepth: 0
388
+ [V4+ Styles]
389
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
390
+ Style: Default,{config['font_name']},{font_size},&H00FFFFFF,&H00FFFFFF,&H00000000,&H80000000,0,0,0,0,100,100,0,0,1,3,1,{config['position_alignment']},{config['margin_left']},{config['margin_right']},{config['margin_vertical']},1
391
+ [Events]
392
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
393
+ """
394
+
395
+ srt_blocks = re.split(r'\n\s*\n', srt_content.strip())
396
+ ass_events = []
397
+ start_index = 1 if skip_first else 0
398
+
399
+ for block in srt_blocks[start_index:]:
400
+ lines = block.strip().split('\n')
401
+ if len(lines) >= 3:
402
+ timestamp_line = lines[1]
403
+ times = timestamp_line.split(' --> ')
404
+ if len(times) == 2:
405
+ start_ms = srt_time_to_ms(times[0])
406
+ end_ms = srt_time_to_ms(times[1])
407
+
408
+ text = ' '.join(lines[2:])
409
+ words = text.split()
410
+
411
+ if not words:
412
+ continue
413
+
414
+ total_duration = end_ms - start_ms
415
+ time_per_word = total_duration / len(words)
416
+
417
+ for i, word in enumerate(words):
418
+ word_start_ms = start_ms + int(i * time_per_word)
419
+ word_end_ms = start_ms + int((i + 1) * time_per_word)
420
+
421
+ if i == len(words) - 1:
422
+ word_end_ms = end_ms
423
+
424
+ text_parts = []
425
+ for j, w in enumerate(words):
426
+ if j == i:
427
+ text_parts.append(f"{{\\c{highlight_text}\\3c{highlight_bg}\\bord5}}{w}{{\\r}}")
428
+ else:
429
+ text_parts.append(w)
430
+
431
+ styled_text = ' '.join(text_parts)
432
+ start_time = ms_to_ass_time(word_start_ms)
433
+ end_time = ms_to_ass_time(word_end_ms)
434
+
435
+ ass_line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{styled_text}"
436
+ ass_events.append(ass_line)
437
+
438
+ with open(ass_path, 'w', encoding='utf-8') as f:
439
+ f.write(ass_header)
440
+ f.write('\n'.join(ass_events))
441
+
442
+ return ass_path
443
+
444
+ def stitch_media(
445
+ video_file, video_url,
446
+ audio_file, audio_url,
447
+ subtitle_file, subtitle_url,
448
+ book_id,
449
+ enable_highlight,
450
+ highlight_color,
451
+ font_size,
452
+ crf_quality=23
453
+ ):
454
+ """Main video stitching function with Reddit overlay and book cover."""
455
+ temp_dir = tempfile.mkdtemp()
456
+
457
+ try:
458
+ ffmpeg_env = setup_custom_fonts_hf(temp_dir)
459
+
460
+ video_path, video_error = validate_and_get_file(video_file, video_url, 'video', temp_dir)
461
+ if video_error: return None, video_error
462
+
463
+ audio_path, audio_error = validate_and_get_file(audio_file, audio_url, 'audio', temp_dir)
464
+ if audio_error: return None, audio_error
465
+
466
+ subtitle_path, subtitle_error = validate_and_get_file(subtitle_file, subtitle_url, 'subtitle', temp_dir)
467
+ if subtitle_error: return None, subtitle_error
468
+
469
+ video_width, video_height, video_fps = get_video_info(video_path)
470
+ audio_duration = get_audio_duration(audio_path)
471
+
472
+ status_msg = "📥 Processing files:\n"
473
+ status_msg += f" • Video: {'URL' if video_url else 'Upload'} ({video_width}x{video_height} @ {video_fps:.2f}fps)\n"
474
+ status_msg += f" • Audio: {'URL' if audio_url else 'Upload'} ({audio_duration:.2f}s)\n"
475
+ status_msg += f" • Subtitle: {'URL' if subtitle_url else 'Upload'}\n"
476
+
477
+ script_dir = os.path.dirname(os.path.abspath(__file__))
478
+ reddit_template_path = os.path.join(script_dir, REDDIT_CONFIG['template_file'])
479
+ has_reddit_template = os.path.exists(reddit_template_path)
480
+
481
+ if has_reddit_template:
482
+ status_msg += " • Reddit template: ✅ Found\n"
483
+ try:
484
+ first_sub_text, first_sub_start, first_sub_end = extract_first_subtitle(subtitle_path)
485
+ status_msg += f"\n📱 Reddit Overlay:\n"
486
+ status_msg += f" • Text: '{first_sub_text[:40]}...'\n"
487
+ status_msg += f" • Timing: {first_sub_start:.1f}s - {first_sub_end:.1f}s\n"
488
+
489
+ reddit_card_path = create_reddit_card_with_text(
490
+ reddit_template_path, first_sub_text, temp_dir, REDDIT_CONFIG
491
+ )
492
+ status_msg += " • ✅ Reddit card ready\n"
493
+ except Exception as e:
494
+ status_msg += f" • ⚠️ Reddit card failed: {str(e)}\n"
495
+ has_reddit_template = False
496
+ else:
497
+ status_msg += " • Reddit template: ⚠️ Not found (skipping)\n"
498
+
499
+ if enable_highlight:
500
+ status_msg += f"\n✨ Word highlighting: {highlight_color} ({font_size}px)\n"
501
+ subtitle_to_use = create_word_by_word_highlight_ass(
502
+ subtitle_path, temp_dir, highlight_color, font_size,
503
+ skip_first=has_reddit_template, config=SUBTITLE_CONFIG
504
+ )
505
+ else:
506
+ subtitle_to_use = subtitle_path
507
+
508
+ subtitle_escaped = subtitle_to_use.replace('\\', '/').replace(':', '\\:')
509
+
510
+ has_book_cover = book_id and book_id.strip()
511
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
512
+ output_path = os.path.join(temp_dir, f"final_{timestamp}.mp4")
513
+
514
+ r, g, b = VIDEO_CONFIG['fade_color_rgb']
515
+ fade_color_hex = f"#dacfc3"
516
+
517
+ if has_book_cover:
518
+ status_msg += f"\n📚 Downloading book cover (ID: {book_id})...\n"
519
+ try:
520
+ book_cover_path = download_book_cover(book_id.strip(), temp_dir)
521
+ status_msg += "✅ Book cover downloaded\n"
522
+
523
+ fade_starts_at = audio_duration * VIDEO_CONFIG['fade_start_percent']
524
+ fade_ends_at = audio_duration * VIDEO_CONFIG['fade_end_percent']
525
+ fade_out_duration = fade_ends_at - fade_starts_at
526
+
527
+ promo_duration = audio_duration * VIDEO_CONFIG['promo_percent']
528
+ book_appears_at = audio_duration - promo_duration
529
+ solid_color_duration = book_appears_at - fade_ends_at
530
+
531
+ main_video_duration = fade_ends_at
532
+ cover_segment_duration = promo_duration
533
+
534
+ status_msg += f"\n⏱️ Timing: Fade {fade_starts_at:.1f}→{fade_ends_at:.1f}s, Hold {solid_color_duration:.1f}s\n"
535
+
536
+ status_msg += "🎬 Step 1/4: Main video with fade-out...\n"
537
+ main_segment_path = os.path.join(temp_dir, f"main_{timestamp}.mp4")
538
+ cmd_main = [
539
+ "ffmpeg", "-stream_loop", "-1", "-i", video_path, "-t", str(main_video_duration),
540
+ "-vf", f"fps={video_fps},scale={video_width}:{video_height},fade=t=out:st={fade_starts_at}:d={fade_out_duration}:c={fade_color_hex}",
541
+ "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", main_segment_path
542
+ ]
543
+ subprocess.run(cmd_main, check=True, capture_output=True, text=True, env=ffmpeg_env)
544
+
545
+ status_msg += "✅ Step 1 done\n🎬 Step 2/4: Solid color...\n"
546
+ solid_color_path = os.path.join(temp_dir, f"solid_{timestamp}.mp4")
547
+ cmd_solid = [
548
+ "ffmpeg", "-f", "lavfi",
549
+ "-i", f"color=c={fade_color_hex}:s={video_width}x{video_height}:d={solid_color_duration}:r={video_fps}",
550
+ "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-y", solid_color_path
551
+ ]
552
+ subprocess.run(cmd_solid, check=True, capture_output=True, text=True, env=ffmpeg_env)
553
+
554
+ status_msg += "✅ Step 2 done\n🎬 Step 3/4: Cover with fade-in...\n"
555
+ cover_segment_path = os.path.join(temp_dir, f"cover_{timestamp}.mp4")
556
+ cmd_cover = [
557
+ "ffmpeg", "-loop", "1", "-i", book_cover_path, "-t", str(cover_segment_duration),
558
+ "-vf", f"scale={video_width}:{video_height},setsar=1,fps={video_fps},fade=t=in:st=0:d={VIDEO_CONFIG['book_fade_in_duration']}:c={fade_color_hex}",
559
+ "-c:v", "libx264", "-crf", str(crf_quality), "-pix_fmt", "yuv420p", "-an", "-y", cover_segment_path
560
+ ]
561
+ subprocess.run(cmd_cover, check=True, capture_output=True, text=True, env=ffmpeg_env)
562
+
563
+ status_msg += "✅ Step 3 done\n🎬 Step 4/4: Final assembly...\n"
564
+ concat_list_path = os.path.join(temp_dir, f"concat_{timestamp}.txt")
565
+ with open(concat_list_path, 'w') as f:
566
+ f.write(f"file '{main_segment_path}'\n")
567
+ f.write(f"file '{solid_color_path}'\n")
568
+ f.write(f"file '{cover_segment_path}'\n")
569
+
570
+ if has_reddit_template:
571
+ filter_complex = (
572
+ f"[0:v]ass={subtitle_escaped}[bg];"
573
+ f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
574
+ f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v]"
575
+ )
576
+ cmd_final = [
577
+ "ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path,
578
+ "-loop", "1", "-i", reddit_card_path, "-i", audio_path,
579
+ "-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
580
+ "-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
581
+ "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
582
+ ]
583
+ else:
584
+ cmd_final = [
585
+ "ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_list_path, "-i", audio_path,
586
+ "-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
587
+ "-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
588
+ "-pix_fmt", "yuv420p", "-shortest", "-y", output_path
589
+ ]
590
+
591
+ subprocess.run(cmd_final, check=True, capture_output=True, text=True, env=ffmpeg_env)
592
+
593
+ except subprocess.CalledProcessError as e:
594
+ return None, f"❌ FFmpeg error:\n{e.stderr[-1000:] if e.stderr else str(e)}"
595
+ except Exception as e:
596
+ return None, f"❌ Error: {str(e)}"
597
+
598
+ else:
599
+ status_msg += "\n🎬 Creating video...\n"
600
+
601
+ if has_reddit_template:
602
+ filter_complex = (
603
+ f"[0:v]ass={subtitle_escaped}[bg];"
604
+ f"[1:v]scale={video_width}*{VIDEO_CONFIG['reddit_scale_percent']}:-1[reddit];"
605
+ f"[bg][reddit]overlay=(W-w)/2:(H-h)/2:enable='between(t,{first_sub_start},{first_sub_end})'[v]"
606
+ )
607
+ cmd = [
608
+ "ffmpeg", "-stream_loop", "-1", "-i", video_path,
609
+ "-loop", "1", "-i", reddit_card_path, "-i", audio_path,
610
+ "-filter_complex", filter_complex, "-map", "[v]", "-map", "2:a",
611
+ "-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
612
+ "-shortest", "-y", output_path
613
+ ]
614
+ else:
615
+ cmd = [
616
+ "ffmpeg", "-stream_loop", "-1", "-i", video_path, "-i", audio_path,
617
+ "-vf", f"ass={subtitle_escaped}", "-map", "0:v", "-map", "1:a",
618
+ "-c:v", "libx264", "-crf", str(crf_quality), "-c:a", "aac",
619
+ "-shortest", "-y", output_path
620
+ ]
621
+
622
+ try:
623
+ subprocess.run(cmd, check=True, capture_output=True, text=True, env=ffmpeg_env)
624
+ except subprocess.CalledProcessError as e:
625
+ return None, f"❌ FFmpeg error:\n{e.stderr[-1000:] if e.stderr else str(e)}"
626
+
627
+ if os.path.exists(output_path):
628
+ file_size = os.path.getsize(output_path) / (1024 * 1024)
629
+ success_msg = f"✅ Video created successfully!\n\n"
630
+ success_msg += f"📊 Size: {file_size:.2f} MB | Duration: {audio_duration:.2f}s\n"
631
+ success_msg += f"🎨 Quality: CRF {crf_quality} | FPS: {video_fps:.2f}\n"
632
+ if has_reddit_template:
633
+ success_msg += f"📱 Reddit: ✅ ({first_sub_start:.1f}-{first_sub_end:.1f}s)\n"
634
+ if has_book_cover:
635
+ success_msg += f"📚 Book: ✅ (Fade: 60→75%, Hold: 75→90%, Book: 90→100%)\n"
636
+ success_msg += "\n" + status_msg
637
+ return output_path, success_msg
638
+ else:
639
+ return None, "❌ Output file was not created"
640
+
641
+ except Exception as e:
642
+ return None, f"❌ Error: {str(e)}"
643
+
644
+
645
+ # ========================================
646
+ # FastAPI app
647
+ # ========================================
648
+ app = FastAPI(title="Video Stitcher API")
649
+
650
+ app.add_middleware(
651
+ CORSMiddleware,
652
+ allow_origins=["*"],
653
+ allow_credentials=True,
654
+ allow_methods=["*"],
655
+ allow_headers=["*"],
656
+ )
657
+
658
+ # ========================================
659
+ # RESPONSE MODELS
660
+ # ========================================
661
+
662
+ class StitchQueuedResponse(BaseModel):
663
+ job_id: str = Field(..., example="3cecd6e6-6920-474c-b924-aa9f174c0bd0")
664
+ status: str = Field(..., example="queued")
665
+ message: str = Field(..., example="Job queued. Poll /status/{job_id} for updates.")
666
+ status_endpoint: str = Field(..., example="http://0.0.0.0:7860/status/3cecd6e6-6920-474c-b924-aa9f174c0bd0")
667
+ result_endpoint: str = Field(..., example="http://0.0.0.0:7860/result/3cecd6e6-6920-474c-b924-aa9f174c0bd0")
668
+
669
+ class StitchErrorResponse(BaseModel):
670
+ job_id: str = Field(..., example="3cecd6e6-6920-474c-b924-aa9f174c0bd0")
671
+ status: str = Field(..., example="failed")
672
+ message: str = Field(..., example="❌ Error downloading video from URL: Connection timeout")
673
+ run_time: str = Field(..., example="2m 15s")
674
+
675
+ class StatusResponse(BaseModel):
676
+ job_id: str = Field(..., example="3cecd6e6-6920-474c-b924-aa9f174c0bd0")
677
+ status: str = Field(..., example="processing")
678
+ message: str = Field(..., example="Processing files...")
679
+
680
+ class ResultCompletedResponse(BaseModel):
681
+ job_id: str = Field(..., example="3cecd6e6-6920-474c-b924-aa9f174c0bd0")
682
+ status: str = Field(..., example="completed")
683
+ message: str = Field(..., example="✅ Video created successfully!")
684
+ download_endpoint: str = Field(..., example="http://0.0.0.0:7860/download/3cecd6e6-6920-474c-b924-aa9f174c0bd0")
685
+ result_file: str = Field(..., example="final_20260219_210606.mp4")
686
+ file_ready: bool = Field(..., example=True)
687
+ run_time: str = Field(..., example="5m 42s")
688
+
689
+ # In-memory job store
690
+ JOBS: dict = {}
691
+
692
+
693
+ def _save_upload_to_temp(upload_file: UploadFile, temp_dir: str) -> str:
694
+ filename = os.path.basename(upload_file.filename)
695
+ dest_path = os.path.join(temp_dir, filename)
696
+ with open(dest_path, 'wb') as f:
697
+ f.write(upload_file.file.read())
698
+ return dest_path
699
+
700
+
701
+ def _run_stitch_job(job_id: str, payload: dict):
702
+ """Background worker — runs in a thread pool, not the async event loop."""
703
+ try:
704
+ start_time = time.time()
705
+ JOBS[job_id]['status'] = 'processing'
706
+ JOBS[job_id]['start_time'] = start_time
707
+
708
+ result_path, message = stitch_media(
709
+ payload.get('video_file'), payload.get('video_url'),
710
+ payload.get('audio_file'), payload.get('audio_url'),
711
+ payload.get('subtitle_file'), payload.get('subtitle_url'),
712
+ payload.get('book_id'), payload.get('enable_highlight', True),
713
+ payload.get('highlight_color', 'yellow'), payload.get('font_size', 18),
714
+ payload.get('crf_quality', 23)
715
+ )
716
+
717
+ end_time = time.time()
718
+ runtime_seconds = int(end_time - start_time)
719
+ runtime_formatted = f"{runtime_seconds // 60}m {runtime_seconds % 60}s"
720
+
721
+ if result_path:
722
+ JOBS[job_id].update({
723
+ 'status': 'completed',
724
+ 'result': result_path,
725
+ 'message': message,
726
+ 'end_time': end_time,
727
+ 'run_time': runtime_formatted,
728
+ })
729
+ else:
730
+ JOBS[job_id].update({
731
+ 'status': 'failed',
732
+ 'message': message,
733
+ 'end_time': end_time,
734
+ 'run_time': runtime_formatted,
735
+ })
736
+ except Exception as e:
737
+ end_time = time.time()
738
+ runtime_seconds = int(end_time - JOBS[job_id].get('start_time', end_time))
739
+ JOBS[job_id].update({
740
+ 'status': 'failed',
741
+ 'message': str(e),
742
+ 'end_time': end_time,
743
+ 'run_time': f"{runtime_seconds // 60}m {runtime_seconds % 60}s",
744
+ })
745
+
746
+
747
+ @app.post(
748
+ '/video_stitch',
749
+ response_model=StitchQueuedResponse,
750
+ responses={500: {"model": StitchErrorResponse}},
751
+ summary="Submit a video stitching job",
752
+ description=(
753
+ "Accepts video, audio, and subtitle inputs (as file uploads or URLs). "
754
+ "Returns a job_id immediately. Poll `/status/{job_id}` to track progress, "
755
+ "then use `/result/{job_id}` or `/download/{job_id}` when complete."
756
+ ),
757
+ )
758
+ async def stitch_upload(
759
+ request: Request,
760
+ video_file: Optional[UploadFile] = File(None),
761
+ audio_file: Optional[UploadFile] = File(None),
762
+ subtitle_file: Optional[UploadFile] = File(None),
763
+ video_url: Optional[str] = Form(None),
764
+ audio_url: Optional[str] = Form(None),
765
+ subtitle_url: Optional[str] = Form(None),
766
+ book_id: Optional[str] = Form(None),
767
+ enable_highlight: bool = Form(True),
768
+ highlight_color: str = Form('yellow'),
769
+ font_size: int = Form(12),
770
+ crf_quality: int = Form(23),
771
+ ):
772
+ temp_dir = tempfile.mkdtemp()
773
+ payload = {
774
+ 'video_file': None,
775
+ 'audio_file': None,
776
+ 'subtitle_file': None,
777
+ 'video_url': video_url,
778
+ 'audio_url': audio_url,
779
+ 'subtitle_url': subtitle_url,
780
+ 'book_id': book_id,
781
+ 'enable_highlight': enable_highlight,
782
+ 'highlight_color': highlight_color,
783
+ 'font_size': font_size,
784
+ 'crf_quality': crf_quality,
785
+ }
786
+
787
+ try:
788
+ if video_file is not None:
789
+ payload['video_file'] = _save_upload_to_temp(video_file, temp_dir)
790
+ if audio_file is not None:
791
+ payload['audio_file'] = _save_upload_to_temp(audio_file, temp_dir)
792
+ if subtitle_file is not None:
793
+ payload['subtitle_file'] = _save_upload_to_temp(subtitle_file, temp_dir)
794
+
795
+ job_id = str(uuid.uuid4())
796
+ JOBS[job_id] = {'status': 'queued', 'message': 'Job queued, waiting to start.', 'result': None}
797
+
798
+ # ✅ FIX: Submit to background thread — returns immediately, no proxy timeout
799
+ executor.submit(_run_stitch_job, job_id, payload)
800
+
801
+ base_url = str(request.base_url).rstrip('/')
802
+ return JSONResponse({
803
+ 'job_id': job_id,
804
+ 'status': 'queued',
805
+ 'message': 'Job queued. Poll /status/{job_id} for updates.',
806
+ 'status_endpoint': f"{base_url}/status/{job_id}",
807
+ 'result_endpoint': f"{base_url}/result/{job_id}",
808
+ })
809
+
810
+ except Exception as e:
811
+ raise HTTPException(status_code=500, detail=str(e))
812
+
813
+
814
+ @app.get('/status/{job_id}', response_model=StatusResponse, summary="Check job status")
815
+ async def job_status(job_id: str):
816
+ job = JOBS.get(job_id)
817
+ if not job:
818
+ raise HTTPException(status_code=404, detail='Job not found')
819
+ return {'job_id': job_id, 'status': job['status'], 'message': job.get('message', '')}
820
+
821
+
822
+ @app.get(
823
+ '/result/{job_id}',
824
+ responses={
825
+ 200: {"model": ResultCompletedResponse, "description": "Job completed — includes download link"},
826
+ 202: {"description": "Job still processing"},
827
+ 404: {"description": "Job not found"},
828
+ },
829
+ summary="Get job result (includes download link when complete)",
830
+ )
831
+ async def job_result(job_id: str, request: Request):
832
+ job = JOBS.get(job_id)
833
+ if not job:
834
+ raise HTTPException(status_code=404, detail='Job not found')
835
+
836
+ base_url = str(request.base_url).rstrip('/')
837
+ response: dict = {
838
+ 'job_id': job_id,
839
+ 'status': job['status'],
840
+ 'message': job.get('message', ''),
841
+ }
842
+
843
+ if job['status'] == 'completed' and job.get('result'):
844
+ response['download_endpoint'] = f"{base_url}/download/{job_id}"
845
+ response['result_file'] = os.path.basename(job['result'])
846
+ response['file_ready'] = True
847
+ response['run_time'] = job.get('run_time', 'N/A')
848
+ return JSONResponse(response, status_code=200)
849
+
850
+ if job['status'] in ('queued', 'processing'):
851
+ return JSONResponse(response, status_code=202)
852
+
853
+ # failed
854
+ return JSONResponse(response, status_code=200)
855
+
856
+
857
+ @app.get('/download/{job_id}', summary="Download the completed video")
858
+ async def download_result(job_id: str):
859
+ job = JOBS.get(job_id)
860
+ if not job:
861
+ raise HTTPException(status_code=404, detail='Job not found')
862
+ if job['status'] != 'completed' or not job.get('result'):
863
+ raise HTTPException(status_code=400, detail='Result not yet available')
864
+ return FileResponse(
865
+ job['result'],
866
+ media_type='video/mp4',
867
+ filename=os.path.basename(job['result']),
868
+ )
869
+
870
+
871
+ @app.get('/health', summary="Health check")
872
+ async def health():
873
+ return {"status": "ok", "jobs_in_memory": len(JOBS)}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.0
2
+ uvicorn[standard]==0.30.6
3
+ static-ffmpeg==2.5
4
+ Pillow==10.4.0
5
+ requests==2.32.3
6
+ aiofiles==23.2.1
7
+ python-multipart==0.0.9
8
+ pydantic==2.8.2