Spaces:
Running
Running
Update server.py
Browse files
server.py
CHANGED
|
@@ -87,7 +87,7 @@ def apply_color_grading_wedding_retro(frame: np.ndarray) -> np.ndarray:
|
|
| 87 |
lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
|
| 88 |
l_channel, a_channel, b_channel = cv2.split(lab)
|
| 89 |
|
| 90 |
-
# 1. VINTAGE/RETRO
|
| 91 |
a_channel = cv2.add(a_channel, 5)
|
| 92 |
b_channel = cv2.add(b_channel, 8)
|
| 93 |
|
|
@@ -126,12 +126,13 @@ def apply_color_grading_wedding_retro(frame: np.ndarray) -> np.ndarray:
|
|
| 126 |
return np.clip(frame, 0, 255).astype(np.uint8)
|
| 127 |
|
| 128 |
|
| 129 |
-
def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int =
|
| 130 |
-
"""Burn caption text onto frame
|
| 131 |
height, width = frame.shape[:2]
|
| 132 |
|
| 133 |
-
frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
| 134 |
-
|
|
|
|
| 135 |
|
| 136 |
try:
|
| 137 |
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
|
|
@@ -139,7 +140,7 @@ def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int = 32) ->
|
|
| 139 |
font = ImageFont.load_default()
|
| 140 |
|
| 141 |
# Word-wrap text
|
| 142 |
-
max_width = width -
|
| 143 |
wrapped_lines = []
|
| 144 |
words = text.split()
|
| 145 |
current_line = []
|
|
@@ -156,29 +157,26 @@ def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int = 32) ->
|
|
| 156 |
if current_line:
|
| 157 |
wrapped_lines.append(' '.join(current_line))
|
| 158 |
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
text_height = len(wrapped_lines) * line_height + 20
|
| 162 |
-
bg_y_start = max(height // 2 - text_height // 2 - 10, 20)
|
| 163 |
-
bg_y_end = min(bg_y_start + text_height, height - 20)
|
| 164 |
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
)
|
| 171 |
-
frame_pil = Image.alpha_composite(frame_pil.convert('RGBA'), overlay).convert('RGB')
|
| 172 |
-
draw = ImageDraw.Draw(frame_pil)
|
| 173 |
-
|
| 174 |
-
y_position = bg_y_start + 10
|
| 175 |
-
for line in wrapped_lines:
|
| 176 |
bbox = draw.textbbox((0, 0), line, font=font)
|
| 177 |
line_width = bbox[2] - bbox[0]
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
|
|
|
| 182 |
return cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR)
|
| 183 |
|
| 184 |
|
|
@@ -191,8 +189,8 @@ def process_video_segment(
|
|
| 191 |
target_width: int = 1080,
|
| 192 |
target_height: int = 1350
|
| 193 |
) -> bool:
|
| 194 |
-
"""Process video segment: crop, resize, color grade, burn captions, encode via FFmpeg."""
|
| 195 |
-
|
| 196 |
try:
|
| 197 |
print(f"Opening video: {video_path}")
|
| 198 |
cap = cv2.VideoCapture(video_path)
|
|
@@ -212,8 +210,10 @@ def process_video_segment(
|
|
| 212 |
print(f"Video info: {fps} fps, {original_width}x{original_height}")
|
| 213 |
print(f"Extracting segment: {start_time} to {end_time} ({duration:.1f}s)")
|
| 214 |
|
| 215 |
-
#
|
| 216 |
-
|
|
|
|
|
|
|
| 217 |
"ffmpeg", "-y",
|
| 218 |
"-f", "rawvideo",
|
| 219 |
"-vcodec", "rawvideo",
|
|
@@ -223,14 +223,13 @@ def process_video_segment(
|
|
| 223 |
"-i", "pipe:0",
|
| 224 |
"-vcodec", "libx264",
|
| 225 |
"-preset", "fast",
|
| 226 |
-
"-crf", "23",
|
| 227 |
-
"-pix_fmt", "yuv420p",
|
| 228 |
-
|
| 229 |
-
output_path
|
| 230 |
]
|
| 231 |
|
| 232 |
-
|
| 233 |
-
|
| 234 |
stdin=subprocess.PIPE,
|
| 235 |
stdout=subprocess.DEVNULL,
|
| 236 |
stderr=subprocess.DEVNULL
|
|
@@ -278,32 +277,69 @@ def process_video_segment(
|
|
| 278 |
if current_caption:
|
| 279 |
frame = burn_captions_to_frame(frame, current_caption)
|
| 280 |
|
| 281 |
-
|
| 282 |
processed_frames += 1
|
| 283 |
|
| 284 |
if processed_frames % max(1, target_frames // 10) == 0:
|
| 285 |
progress = (processed_frames / target_frames) * 100
|
| 286 |
print(f"Progress: {progress:.1f}%")
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
| 290 |
cap.release()
|
| 291 |
|
| 292 |
-
if
|
| 293 |
-
print(f"β FFmpeg encoding failed with return code {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
return False
|
| 295 |
|
| 296 |
-
print(f"β Video segment saved: {output_path}")
|
| 297 |
return True
|
| 298 |
|
| 299 |
except Exception as e:
|
| 300 |
print(f"β Error processing video segment: {e}")
|
| 301 |
-
if
|
| 302 |
try:
|
| 303 |
-
|
| 304 |
except Exception:
|
| 305 |
pass
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
return False
|
| 308 |
|
| 309 |
|
|
@@ -546,4 +582,4 @@ async def trigger_processing():
|
|
| 546 |
if __name__ == "__main__":
|
| 547 |
print("Starting Video Processing Service on port 7860...")
|
| 548 |
print("Processing will begin 3 minutes after startup")
|
| 549 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 87 |
lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
|
| 88 |
l_channel, a_channel, b_channel = cv2.split(lab)
|
| 89 |
|
| 90 |
+
# 1. VINTAGE/RETRO: warm tones
|
| 91 |
a_channel = cv2.add(a_channel, 5)
|
| 92 |
b_channel = cv2.add(b_channel, 8)
|
| 93 |
|
|
|
|
| 126 |
return np.clip(frame, 0, 255).astype(np.uint8)
|
| 127 |
|
| 128 |
|
| 129 |
+
def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int = 36) -> np.ndarray:
|
| 130 |
+
"""Burn caption text onto frame β shadow only, no background, positioned near bottom."""
|
| 131 |
height, width = frame.shape[:2]
|
| 132 |
|
| 133 |
+
frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).convert('RGBA')
|
| 134 |
+
overlay = Image.new('RGBA', frame_pil.size, (0, 0, 0, 0))
|
| 135 |
+
draw = ImageDraw.Draw(overlay)
|
| 136 |
|
| 137 |
try:
|
| 138 |
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
|
|
|
|
| 140 |
font = ImageFont.load_default()
|
| 141 |
|
| 142 |
# Word-wrap text
|
| 143 |
+
max_width = width - 80
|
| 144 |
wrapped_lines = []
|
| 145 |
words = text.split()
|
| 146 |
current_line = []
|
|
|
|
| 157 |
if current_line:
|
| 158 |
wrapped_lines.append(' '.join(current_line))
|
| 159 |
|
| 160 |
+
line_height = font_size + 12
|
| 161 |
+
total_text_height = len(wrapped_lines) * line_height
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
+
# Position: 80% down the frame (near bottom, not center)
|
| 164 |
+
y_start = int(height * 0.80) - total_text_height // 2
|
| 165 |
+
|
| 166 |
+
shadow_offset = 3
|
| 167 |
+
|
| 168 |
+
for i, line in enumerate(wrapped_lines):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
bbox = draw.textbbox((0, 0), line, font=font)
|
| 170 |
line_width = bbox[2] - bbox[0]
|
| 171 |
+
x = (width - line_width) // 2
|
| 172 |
+
y = y_start + i * line_height
|
| 173 |
+
|
| 174 |
+
# Draw shadow (dark, slightly offset)
|
| 175 |
+
draw.text((x + shadow_offset, y + shadow_offset), line, font=font, fill=(0, 0, 0, 200))
|
| 176 |
+
# Draw main white text
|
| 177 |
+
draw.text((x, y), line, font=font, fill=(255, 255, 255, 255))
|
| 178 |
|
| 179 |
+
frame_pil = Image.alpha_composite(frame_pil, overlay).convert('RGB')
|
| 180 |
return cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR)
|
| 181 |
|
| 182 |
|
|
|
|
| 189 |
target_width: int = 1080,
|
| 190 |
target_height: int = 1350
|
| 191 |
) -> bool:
|
| 192 |
+
"""Process video segment: crop, resize, color grade, burn captions, encode with audio via FFmpeg."""
|
| 193 |
+
ffmpeg_video_proc = None
|
| 194 |
try:
|
| 195 |
print(f"Opening video: {video_path}")
|
| 196 |
cap = cv2.VideoCapture(video_path)
|
|
|
|
| 210 |
print(f"Video info: {fps} fps, {original_width}x{original_height}")
|
| 211 |
print(f"Extracting segment: {start_time} to {end_time} ({duration:.1f}s)")
|
| 212 |
|
| 213 |
+
# Step 1: Write processed frames to a temp video-only file
|
| 214 |
+
temp_video_path = output_path.replace(".mp4", "_noaudio.mp4")
|
| 215 |
+
|
| 216 |
+
ffmpeg_video_cmd = [
|
| 217 |
"ffmpeg", "-y",
|
| 218 |
"-f", "rawvideo",
|
| 219 |
"-vcodec", "rawvideo",
|
|
|
|
| 223 |
"-i", "pipe:0",
|
| 224 |
"-vcodec", "libx264",
|
| 225 |
"-preset", "fast",
|
| 226 |
+
"-crf", "23",
|
| 227 |
+
"-pix_fmt", "yuv420p",
|
| 228 |
+
temp_video_path
|
|
|
|
| 229 |
]
|
| 230 |
|
| 231 |
+
ffmpeg_video_proc = subprocess.Popen(
|
| 232 |
+
ffmpeg_video_cmd,
|
| 233 |
stdin=subprocess.PIPE,
|
| 234 |
stdout=subprocess.DEVNULL,
|
| 235 |
stderr=subprocess.DEVNULL
|
|
|
|
| 277 |
if current_caption:
|
| 278 |
frame = burn_captions_to_frame(frame, current_caption)
|
| 279 |
|
| 280 |
+
ffmpeg_video_proc.stdin.write(frame.tobytes())
|
| 281 |
processed_frames += 1
|
| 282 |
|
| 283 |
if processed_frames % max(1, target_frames // 10) == 0:
|
| 284 |
progress = (processed_frames / target_frames) * 100
|
| 285 |
print(f"Progress: {progress:.1f}%")
|
| 286 |
|
| 287 |
+
ffmpeg_video_proc.stdin.close()
|
| 288 |
+
ffmpeg_video_proc.wait()
|
| 289 |
cap.release()
|
| 290 |
|
| 291 |
+
if ffmpeg_video_proc.returncode != 0:
|
| 292 |
+
print(f"β FFmpeg video encoding failed with return code {ffmpeg_video_proc.returncode}")
|
| 293 |
+
return False
|
| 294 |
+
|
| 295 |
+
print("β Video frames encoded, muxing audio...")
|
| 296 |
+
|
| 297 |
+
# Step 2: Mux processed video with audio extracted directly from source
|
| 298 |
+
ffmpeg_mux_cmd = [
|
| 299 |
+
"ffmpeg", "-y",
|
| 300 |
+
"-i", temp_video_path, # processed video (no audio)
|
| 301 |
+
"-ss", str(start_seconds), # seek audio to segment start
|
| 302 |
+
"-to", str(end_seconds), # audio end point
|
| 303 |
+
"-i", video_path, # original source for audio
|
| 304 |
+
"-map", "0:v:0", # video from processed file
|
| 305 |
+
"-map", "1:a:0", # audio from original source
|
| 306 |
+
"-c:v", "copy", # don't re-encode video
|
| 307 |
+
"-c:a", "aac", # encode audio to AAC
|
| 308 |
+
"-b:a", "192k",
|
| 309 |
+
"-shortest",
|
| 310 |
+
"-movflags", "+faststart",
|
| 311 |
+
output_path
|
| 312 |
+
]
|
| 313 |
+
|
| 314 |
+
mux_result = subprocess.run(
|
| 315 |
+
ffmpeg_mux_cmd,
|
| 316 |
+
stdout=subprocess.DEVNULL,
|
| 317 |
+
stderr=subprocess.DEVNULL
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
# Clean up temp video file
|
| 321 |
+
if os.path.exists(temp_video_path):
|
| 322 |
+
os.remove(temp_video_path)
|
| 323 |
+
|
| 324 |
+
if mux_result.returncode != 0:
|
| 325 |
+
print(f"β FFmpeg audio mux failed with return code {mux_result.returncode}")
|
| 326 |
return False
|
| 327 |
|
| 328 |
+
print(f"β Video segment with audio saved: {output_path}")
|
| 329 |
return True
|
| 330 |
|
| 331 |
except Exception as e:
|
| 332 |
print(f"β Error processing video segment: {e}")
|
| 333 |
+
if ffmpeg_video_proc is not None:
|
| 334 |
try:
|
| 335 |
+
ffmpeg_video_proc.stdin.close()
|
| 336 |
except Exception:
|
| 337 |
pass
|
| 338 |
+
ffmpeg_video_proc.wait()
|
| 339 |
+
# Clean up temp file if it exists
|
| 340 |
+
temp_video_path = output_path.replace(".mp4", "_noaudio.mp4")
|
| 341 |
+
if os.path.exists(temp_video_path):
|
| 342 |
+
os.remove(temp_video_path)
|
| 343 |
return False
|
| 344 |
|
| 345 |
|
|
|
|
| 582 |
if __name__ == "__main__":
|
| 583 |
print("Starting Video Processing Service on port 7860...")
|
| 584 |
print("Processing will begin 3 minutes after startup")
|
| 585 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|