Spaces:

factorstudios
/

segment

Running

App Files Files Community

factorstudios commited on 1 day ago

Commit

70e55d5

verified ·

1 Parent(s): de435e7

Update server.py

Browse files

Files changed (1) hide show

server.py +81 -45

server.py CHANGED Viewed

@@ -87,7 +87,7 @@ def apply_color_grading_wedding_retro(frame: np.ndarray) -> np.ndarray:
     lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
     l_channel, a_channel, b_channel = cv2.split(lab)
-    # 1. VINTAGE/RETRO EFFECT: warm tones
     a_channel = cv2.add(a_channel, 5)
     b_channel = cv2.add(b_channel, 8)
@@ -126,12 +126,13 @@ def apply_color_grading_wedding_retro(frame: np.ndarray) -> np.ndarray:
     return np.clip(frame, 0, 255).astype(np.uint8)
-def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int = 32) -> np.ndarray:
-    """Burn caption text onto frame with semi-transparent background (centered)."""
     height, width = frame.shape[:2]
-    frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-    draw = ImageDraw.Draw(frame_pil, 'RGBA')
     try:
         font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
@@ -139,7 +140,7 @@ def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int = 32) ->
         font = ImageFont.load_default()
     # Word-wrap text
-    max_width = width - 60
     wrapped_lines = []
     words = text.split()
     current_line = []
@@ -156,29 +157,26 @@ def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int = 32) ->
     if current_line:
         wrapped_lines.append(' '.join(current_line))
-    # Background box dimensions
-    line_height = font_size + 10
-    text_height = len(wrapped_lines) * line_height + 20
-    bg_y_start = max(height // 2 - text_height // 2 - 10, 20)
-    bg_y_end = min(bg_y_start + text_height, height - 20)
-    overlay = Image.new('RGBA', frame_pil.size, (0, 0, 0, 0))
-    overlay_draw = ImageDraw.Draw(overlay, 'RGBA')
-    overlay_draw.rectangle(
-        [(20, bg_y_start), (width - 20, bg_y_end)],
-        fill=(0, 0, 0, 180)
-    )
-    frame_pil = Image.alpha_composite(frame_pil.convert('RGBA'), overlay).convert('RGB')
-    draw = ImageDraw.Draw(frame_pil)
-    y_position = bg_y_start + 10
-    for line in wrapped_lines:
         bbox = draw.textbbox((0, 0), line, font=font)
         line_width = bbox[2] - bbox[0]
-        x_position = (width - line_width) // 2
-        draw.text((x_position, y_position), line, font=font, fill=(255, 255, 255, 255))
-        y_position += line_height
     return cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR)
@@ -191,8 +189,8 @@ def process_video_segment(
     target_width: int = 1080,
     target_height: int = 1350
 ) -> bool:
-    """Process video segment: crop, resize, color grade, burn captions, encode via FFmpeg."""
-    ffmpeg_proc = None
     try:
         print(f"Opening video: {video_path}")
         cap = cv2.VideoCapture(video_path)
@@ -212,8 +210,10 @@ def process_video_segment(
         print(f"Video info: {fps} fps, {original_width}x{original_height}")
         print(f"Extracting segment: {start_time} to {end_time} ({duration:.1f}s)")
-        # Pipe frames into FFmpeg — proper H.264 with real compression
-        ffmpeg_cmd = [
             "ffmpeg", "-y",
             "-f", "rawvideo",
             "-vcodec", "rawvideo",
@@ -223,14 +223,13 @@ def process_video_segment(
             "-i", "pipe:0",
             "-vcodec", "libx264",
             "-preset", "fast",
-            "-crf", "23",           # 0=lossless, 51=worst; 23 is a solid default
-            "-pix_fmt", "yuv420p",  # broad playback compatibility
-            "-movflags", "+faststart",
-            output_path
         ]
-        ffmpeg_proc = subprocess.Popen(
-            ffmpeg_cmd,
             stdin=subprocess.PIPE,
             stdout=subprocess.DEVNULL,
             stderr=subprocess.DEVNULL
@@ -278,32 +277,69 @@ def process_video_segment(
             if current_caption:
                 frame = burn_captions_to_frame(frame, current_caption)
-            ffmpeg_proc.stdin.write(frame.tobytes())
             processed_frames += 1
             if processed_frames % max(1, target_frames // 10) == 0:
                 progress = (processed_frames / target_frames) * 100
                 print(f"Progress: {progress:.1f}%")
-        ffmpeg_proc.stdin.close()
-        ffmpeg_proc.wait()
         cap.release()
-        if ffmpeg_proc.returncode != 0:
-            print(f"✗ FFmpeg encoding failed with return code {ffmpeg_proc.returncode}")
             return False
-        print(f"✓ Video segment saved: {output_path}")
         return True
     except Exception as e:
         print(f"✗ Error processing video segment: {e}")
-        if ffmpeg_proc is not None:
             try:
-                ffmpeg_proc.stdin.close()
             except Exception:
                 pass
-            ffmpeg_proc.wait()
         return False
@@ -546,4 +582,4 @@ async def trigger_processing():
 if __name__ == "__main__":
     print("Starting Video Processing Service on port 7860...")
     print("Processing will begin 3 minutes after startup")
-    uvicorn.run(app, host="0.0.0.0", port=7860)

     lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
     l_channel, a_channel, b_channel = cv2.split(lab)
+    # 1. VINTAGE/RETRO: warm tones
     a_channel = cv2.add(a_channel, 5)
     b_channel = cv2.add(b_channel, 8)
     return np.clip(frame, 0, 255).astype(np.uint8)
+def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int = 36) -> np.ndarray:
+    """Burn caption text onto frame — shadow only, no background, positioned near bottom."""
     height, width = frame.shape[:2]
+    frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).convert('RGBA')
+    overlay = Image.new('RGBA', frame_pil.size, (0, 0, 0, 0))
+    draw = ImageDraw.Draw(overlay)
     try:
         font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
         font = ImageFont.load_default()
     # Word-wrap text
+    max_width = width - 80
     wrapped_lines = []
     words = text.split()
     current_line = []
     if current_line:
         wrapped_lines.append(' '.join(current_line))
+    line_height = font_size + 12
+    total_text_height = len(wrapped_lines) * line_height
+    # Position: 80% down the frame (near bottom, not center)
+    y_start = int(height * 0.80) - total_text_height // 2
+    shadow_offset = 3
+    for i, line in enumerate(wrapped_lines):
         bbox = draw.textbbox((0, 0), line, font=font)
         line_width = bbox[2] - bbox[0]
+        x = (width - line_width) // 2
+        y = y_start + i * line_height
+        # Draw shadow (dark, slightly offset)
+        draw.text((x + shadow_offset, y + shadow_offset), line, font=font, fill=(0, 0, 0, 200))
+        # Draw main white text
+        draw.text((x, y), line, font=font, fill=(255, 255, 255, 255))
+    frame_pil = Image.alpha_composite(frame_pil, overlay).convert('RGB')
     return cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR)
     target_width: int = 1080,
     target_height: int = 1350
 ) -> bool:
+    """Process video segment: crop, resize, color grade, burn captions, encode with audio via FFmpeg."""
+    ffmpeg_video_proc = None
     try:
         print(f"Opening video: {video_path}")
         cap = cv2.VideoCapture(video_path)
         print(f"Video info: {fps} fps, {original_width}x{original_height}")
         print(f"Extracting segment: {start_time} to {end_time} ({duration:.1f}s)")
+        # Step 1: Write processed frames to a temp video-only file
+        temp_video_path = output_path.replace(".mp4", "_noaudio.mp4")
+        ffmpeg_video_cmd = [
             "ffmpeg", "-y",
             "-f", "rawvideo",
             "-vcodec", "rawvideo",
             "-i", "pipe:0",
             "-vcodec", "libx264",
             "-preset", "fast",
+            "-crf", "23",
+            "-pix_fmt", "yuv420p",
+            temp_video_path
         ]
+        ffmpeg_video_proc = subprocess.Popen(
+            ffmpeg_video_cmd,
             stdin=subprocess.PIPE,
             stdout=subprocess.DEVNULL,
             stderr=subprocess.DEVNULL
             if current_caption:
                 frame = burn_captions_to_frame(frame, current_caption)
+            ffmpeg_video_proc.stdin.write(frame.tobytes())
             processed_frames += 1
             if processed_frames % max(1, target_frames // 10) == 0:
                 progress = (processed_frames / target_frames) * 100
                 print(f"Progress: {progress:.1f}%")
+        ffmpeg_video_proc.stdin.close()
+        ffmpeg_video_proc.wait()
         cap.release()
+        if ffmpeg_video_proc.returncode != 0:
+            print(f"✗ FFmpeg video encoding failed with return code {ffmpeg_video_proc.returncode}")
+            return False
+        print("✓ Video frames encoded, muxing audio...")
+        # Step 2: Mux processed video with audio extracted directly from source
+        ffmpeg_mux_cmd = [
+            "ffmpeg", "-y",
+            "-i", temp_video_path,                  # processed video (no audio)
+            "-ss", str(start_seconds),               # seek audio to segment start
+            "-to", str(end_seconds),                 # audio end point
+            "-i", video_path,                        # original source for audio
+            "-map", "0:v:0",                         # video from processed file
+            "-map", "1:a:0",                         # audio from original source
+            "-c:v", "copy",                          # don't re-encode video
+            "-c:a", "aac",                           # encode audio to AAC
+            "-b:a", "192k",
+            "-shortest",
+            "-movflags", "+faststart",
+            output_path
+        ]
+        mux_result = subprocess.run(
+            ffmpeg_mux_cmd,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL
+        )
+        # Clean up temp video file
+        if os.path.exists(temp_video_path):
+            os.remove(temp_video_path)
+        if mux_result.returncode != 0:
+            print(f"✗ FFmpeg audio mux failed with return code {mux_result.returncode}")
             return False
+        print(f"✓ Video segment with audio saved: {output_path}")
         return True
     except Exception as e:
         print(f"✗ Error processing video segment: {e}")
+        if ffmpeg_video_proc is not None:
             try:
+                ffmpeg_video_proc.stdin.close()
             except Exception:
                 pass
+            ffmpeg_video_proc.wait()
+        # Clean up temp file if it exists
+        temp_video_path = output_path.replace(".mp4", "_noaudio.mp4")
+        if os.path.exists(temp_video_path):
+            os.remove(temp_video_path)
         return False
 if __name__ == "__main__":
     print("Starting Video Processing Service on port 7860...")
     print("Processing will begin 3 minutes after startup")
+    uvicorn.run(app, host="0.0.0.0", port=7860)