Spaces:

factorstudios
/

segment

Running

App Files Files Community

factorstudios commited on 1 day ago

Commit

cb2ab9a

verified ·

1 Parent(s): a0dce99

Create server.py

Browse files

Files changed (1) hide show

server.py +542 -0

server.py ADDED Viewed

	@@ -0,0 +1,542 @@

+#!/usr/bin/env python3
+import os
+import json
+import re
+import asyncio
+import tempfile
+import subprocess
+from pathlib import Path
+from datetime import datetime
+from dotenv import load_dotenv
+from typing import List, Dict, Optional
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import JSONResponse
+import uvicorn
+try:
+    from huggingface_hub import list_repo_files, hf_hub_download, upload_file
+    import cv2
+    import numpy as np
+    from PIL import Image, ImageDraw, ImageFont
+except ImportError as e:
+    print(f"Missing dependency: {e}")
+    exit(1)
+# Load environment variables
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
+if not HF_TOKEN:
+    print("Error: Missing HF_TOKEN in .env")
+    exit(1)
+app = FastAPI(title="Video Processing Service")
+# Global state
+processing_state = {
+    "is_running": False,
+    "total_processed": 0,
+    "current_file": None,
+    "error_count": 0,
+    "last_error": None,
+    "processed_files": []
+}
+HF_DATASET_REPO = "factorstudios/movs"
+HOOKS_FOLDER = "hooks"
+READY_VIDEOS_FOLDER = "ready_videos"
+TRANSCRIPTION_FOLDER = "transcriptions"
+def timestamp_to_seconds(timestamp: str) -> float:
+    """Convert HH:MM:SS to seconds."""
+    try:
+        parts = timestamp.split(":")
+        hours = int(parts[0])
+        minutes = int(parts[1])
+        seconds = int(parts[2])
+        return hours * 3600 + minutes * 60 + seconds
+    except Exception as e:
+        print(f"Error converting timestamp {timestamp}: {e}")
+        return 0.0
+def extract_captions_for_segment(transcript_content: str, start_time: str, end_time: str) -> List[tuple]:
+    """Extract captions from transcript that fall within segment timeframe.
+    Returns list of (timestamp, text) tuples."""
+    captions = []
+    start_seconds = timestamp_to_seconds(start_time)
+    end_seconds = timestamp_to_seconds(end_time)
+    # Parse transcript lines in format: [HH:MM:SS] text
+    lines = transcript_content.strip().split('\n')
+    for line in lines:
+        match = re.match(r'\[(\d{2}):(\d{2}):(\d{2})\]\s+(.*)', line)
+        if match:
+            h, m, s, text = match.groups()
+            line_seconds = int(h) * 3600 + int(m) * 60 + int(s)
+            if start_seconds <= line_seconds <= end_seconds:
+                relative_time = line_seconds - start_seconds
+                captions.append((relative_time, text.strip()))
+    return captions
+def apply_color_grading_wedding_retro(frame: np.ndarray) -> np.ndarray:
+    """Apply cinematic wedding LUT + retro style with high sharpening."""
+    # Convert BGR to LAB for better color manipulation
+    lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
+    # Split LAB channels
+    l_channel, a_channel, b_channel = cv2.split(lab)
+    # 1. VINTAGE/RETRO EFFECT: Add warm tones
+    # Increase yellows and reduce blues (warm vintage look)
+    a_channel = cv2.add(a_channel, 5)  # Shift towards magenta/red slightly
+    b_channel = cv2.add(b_channel, 8)  # Shift towards yellow/warm
+    # 2. WEDDING LOOK: Soft highlights, skin tone enhancement
+    # Boost highlights on L channel
+    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
+    l_channel = clahe.apply(l_channel)
+    # Merge back
+    lab_enhanced = cv2.merge([l_channel, a_channel, b_channel])
+    frame = cv2.cvtColor(lab_enhanced, cv2.COLOR_LAB2BGR)
+    # 3. SATURATION BOOST (wedding cinematics are vibrant)
+    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV).astype(np.float32)
+    hsv[:, :, 1] = hsv[:, :, 1] * 1.3  # Boost saturation by 30%
+    hsv[:, :, 1] = np.clip(hsv[:, :, 1], 0, 255)
+    frame = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
+    # 4. CONTRAST ENHANCEMENT (cinematic look)
+    frame = cv2.convertScaleAbs(frame, alpha=1.15, beta=10)
+    # 5. HIGH SHARPENING (professional quality)
+    kernel = np.array([[-1, -1, -1],
+                       [-1,  9, -1],
+                       [-1, -1, -1]]) / 1.2
+    sharpened = cv2.filter2D(frame, -1, kernel)
+    # Blend original with sharpened for natural look
+    frame = cv2.addWeighted(frame, 0.4, sharpened, 0.6, 0)
+    # 6. SLIGHT VIGNETTE (cinematic framing)
+    rows, cols = frame.shape[:2]
+    X_resultant_kernel = cv2.getGaussianKernel(cols, cols/2)
+    Y_resultant_kernel = cv2.getGaussianKernel(rows, rows/2)
+    kernel = Y_resultant_kernel * X_resultant_kernel.T
+    mask = kernel / kernel.max()
+    mask = mask ** 0.4  # Adjust intensity
+    for i in range(3):  # Apply to each channel
+        frame[:, :, i] = frame[:, :, i] * mask
+    return np.clip(frame, 0, 255).astype(np.uint8)
+def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int = 32) -> np.ndarray:
+    """Burn caption text onto frame with semi-transparent background (centered)."""
+    height, width = frame.shape[:2]
+    # Convert frame to PIL for easier text rendering
+    frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+    draw = ImageDraw.Draw(frame_pil, 'RGBA')
+    # Try to use a nice font, fall back to default
+    try:
+        font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
+    except:
+        font = ImageFont.load_default()
+    # Wrap text for width
+    max_width = width - 60
+    wrapped_lines = []
+    words = text.split()
+    current_line = []
+    for word in words:
+        test_line = ' '.join(current_line + [word])
+        bbox = draw.textbbox((0, 0), test_line, font=font)
+        if bbox[2] - bbox[0] > max_width:
+            if current_line:
+                wrapped_lines.append(' '.join(current_line))
+            current_line = [word]
+        else:
+            current_line.append(word)
+    if current_line:
+        wrapped_lines.append(' '.join(current_line))
+    # Calculate dimensions for background
+    line_height = font_size + 10
+    text_height = len(wrapped_lines) * line_height + 20
+    bg_y_start = max(height // 2 - text_height // 2 - 10, 20)
+    bg_y_end = min(bg_y_start + text_height, height - 20)
+    # Draw semi-transparent background
+    overlay = Image.new('RGBA', frame_pil.size, (0, 0, 0, 0))
+    overlay_draw = ImageDraw.Draw(overlay, 'RGBA')
+    overlay_draw.rectangle(
+        [(20, bg_y_start), (width - 20, bg_y_end)],
+        fill=(0, 0, 0, 180)  # Semi-transparent black
+    )
+    frame_pil = Image.alpha_composite(frame_pil.convert('RGBA'), overlay).convert('RGB')
+    draw = ImageDraw.Draw(frame_pil)
+    # Draw text centered
+    y_position = bg_y_start + 10
+    for line in wrapped_lines:
+        bbox = draw.textbbox((0, 0), line, font=font)
+        line_width = bbox[2] - bbox[0]
+        x_position = (width - line_width) // 2
+        draw.text((x_position, y_position), line, font=font, fill=(255, 255, 255, 255))
+        y_position += line_height
+    # Convert back to OpenCV format
+    frame = cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR)
+    return frame
+def process_video_segment(
+    video_path: str,
+    output_path: str,
+    start_time: str,
+    end_time: str,
+    captions: List[tuple],
+    target_width: int = 1080,
+    target_height: int = 1350
+) -> bool:
+    """Process video segment: resize, cut, add captions, apply color grading."""
+    try:
+        print(f"Opening video: {video_path}")
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            print(f"Error: Could not open video {video_path}")
+            return False
+        # Get video properties
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        start_seconds = timestamp_to_seconds(start_time)
+        end_seconds = timestamp_to_seconds(end_time)
+        duration = end_seconds - start_seconds
+        print(f"Video info: {fps} fps, {original_width}x{original_height}")
+        print(f"Extracting segment: {start_time} to {end_time} ({duration} seconds)")
+        # Setup video writer
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_path, fourcc, fps, (target_width, target_height))
+        if not out.isOpened():
+            print(f"Error: Could not create video writer for {output_path}")
+            return False
+        # Seek to start time
+        start_frame = int(start_seconds * fps)
+        cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
+        # Create a mapping of frame numbers to captions
+        caption_map = {}
+        for rel_time, caption_text in captions:
+            frame_num = int(rel_time * fps)
+            caption_map[frame_num] = caption_text
+        current_caption = ""
+        processed_frames = 0
+        target_frames = int(duration * fps)
+        print(f"Processing {target_frames} frames...")
+        while processed_frames < target_frames:
+            ret, frame = cap.read()
+            if not ret:
+                print(f"Warning: Could not read frame at position {processed_frames}")
+                break
+            # Resize frame to target aspect ratio
+            # Calculate dimensions maintaining aspect ratio
+            aspect_ratio = target_width / target_height
+            if original_width / original_height > aspect_ratio:
+                # Width is too large
+                new_height = original_height
+                new_width = int(new_height * aspect_ratio)
+                x_offset = (original_width - new_width) // 2
+                frame = frame[:, x_offset:x_offset + new_width]
+            else:
+                # Height is too large
+                new_width = original_width
+                new_height = int(new_width / aspect_ratio)
+                y_offset = (original_height - new_height) // 2
+                frame = frame[y_offset:y_offset + new_height, :]
+            frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_LANCZOS4)
+            # Apply color grading
+            frame = apply_color_grading_wedding_retro(frame)
+            # Update caption if needed
+            if processed_frames in caption_map:
+                current_caption = caption_map[processed_frames]
+            # Burn caption
+            if current_caption:
+                frame = burn_captions_to_frame(frame, current_caption)
+            out.write(frame)
+            processed_frames += 1
+            if processed_frames % max(1, target_frames // 10) == 0:
+                progress = (processed_frames / target_frames) * 100
+                print(f"Progress: {progress:.1f}%")
+        cap.release()
+        out.release()
+        print(f"✓ Video segment saved: {output_path}")
+        return True
+    except Exception as e:
+        print(f"✗ Error processing video segment: {e}")
+        return False
+async def process_movie_segments(movie_name: str) -> bool:
+    """Process all segments for a movie."""
+    try:
+        processing_state["current_file"] = movie_name
+        print(f"\n{'='*80}")
+        print(f"Processing movie: {movie_name}")
+        print(f"{'='*80}")
+        # Download transcript
+        transcript_file = f"{TRANSCRIPTION_FOLDER}/{movie_name}.transcript.txt"
+        print(f"Downloading transcript: {transcript_file}")
+        try:
+            transcript_path = hf_hub_download(
+                repo_id=HF_DATASET_REPO,
+                filename=transcript_file,
+                repo_type="dataset",
+                token=HF_TOKEN,
+                cache_dir="/tmp/video_processor_cache"
+            )
+            with open(transcript_path, 'r', encoding='utf-8') as f:
+                transcript_content = f.read()
+        except Exception as e:
+            print(f"Warning: Could not download transcript: {e}")
+            transcript_content = ""
+        # Download original video
+        video_file = f"{movie_name}.mkv"
+        print(f"Downloading video: {video_file}")
+        try:
+            video_path = hf_hub_download(
+                repo_id=HF_DATASET_REPO,
+                filename=video_file,
+                repo_type="dataset",
+                token=HF_TOKEN,
+                cache_dir="/tmp/video_processor_cache"
+            )
+            # Resolve symlink if needed
+            if os.path.islink(video_path):
+                video_path = os.path.realpath(video_path)
+        except Exception as e:
+            print(f"Error: Could not download video: {e}")
+            return False
+        # List segment JSON files
+        hooks_folder = f"{HOOKS_FOLDER}/{movie_name}"
+        print(f"Listing segments from: {hooks_folder}")
+        files = list_repo_files(
+            repo_id=HF_DATASET_REPO,
+            repo_type="dataset",
+            token=HF_TOKEN
+        )
+        segment_files = sorted([
+            f for f in files
+            if f.startswith(f"{hooks_folder}/") and f.endswith(".json")
+        ])
+        if not segment_files:
+            print(f"No segment JSON files found for {movie_name}")
+            return False
+        print(f"Found {len(segment_files)} segments")
+        # Process each segment
+        temp_dir = tempfile.mkdtemp()
+        try:
+            for segment_file in segment_files:
+                try:
+                    # Download segment JSON
+                    segment_path = hf_hub_download(
+                        repo_id=HF_DATASET_REPO,
+                        filename=segment_file,
+                        repo_type="dataset",
+                        token=HF_TOKEN,
+                        cache_dir="/tmp/video_processor_cache"
+                    )
+                    with open(segment_path, 'r', encoding='utf-8') as f:
+                        segment_data = json.load(f)
+                    segment_number = segment_data.get("segment_number", 1)
+                    start_time = segment_data.get("start_time", "00:00:00")
+                    end_time = segment_data.get("end_time", "00:10:00")
+                    print(f"\nProcessing segment {segment_number}: {start_time} to {end_time}")
+                    # Extract captions for this segment
+                    captions = extract_captions_for_segment(transcript_content, start_time, end_time)
+                    print(f"Found {len(captions)} caption lines for this segment")
+                    # Process video
+                    output_filename = f"segment-{segment_number:02d}.mp4"
+                    output_path = os.path.join(temp_dir, output_filename)
+                    success = process_video_segment(
+                        video_path,
+                        output_path,
+                        start_time,
+                        end_time,
+                        captions
+                    )
+                    if not success:
+                        print(f"Failed to process segment {segment_number}")
+                        continue
+                    # Upload to dataset
+                    upload_path = f"{READY_VIDEOS_FOLDER}/{movie_name}/{output_filename}"
+                    print(f"Uploading to: {upload_path}")
+                    upload_file(
+                        path_or_fileobj=output_path,
+                        path_in_repo=upload_path,
+                        repo_id=HF_DATASET_REPO,
+                        repo_type="dataset",
+                        token=HF_TOKEN,
+                        commit_message=f"Add processed video segment {segment_number} for {movie_name}"
+                    )
+                    print(f"✓ Segment {segment_number} uploaded successfully")
+                except Exception as e:
+                    print(f"✗ Error processing segment: {e}")
+                    processing_state["error_count"] += 1
+                    continue
+        finally:
+            import shutil
+            shutil.rmtree(temp_dir, ignore_errors=True)
+        processing_state["processed_files"].append(movie_name)
+        processing_state["total_processed"] += 1
+        print(f"\n✓ Successfully processed all segments for {movie_name}")
+        return True
+    except Exception as e:
+        processing_state["error_count"] += 1
+        processing_state["last_error"] = str(e)
+        print(f"✗ Error: {e}")
+        return False
+async def scan_and_process_videos():
+    """Scan hooks folder and process all movies."""
+    if processing_state["is_running"]:
+        print("Video processing already running, skipping...")
+        return
+    processing_state["is_running"] = True
+    print("\n" + "="*80)
+    print("STARTING VIDEO PROCESSING SERVICE")
+    print("="*80)
+    try:
+        files = list_repo_files(
+            repo_id=HF_DATASET_REPO,
+            repo_type="dataset",
+            token=HF_TOKEN
+        )
+        # Find all movie folders in hooks/
+        movie_folders = set()
+        for f in files:
+            if f.startswith(f"{HOOKS_FOLDER}/") and f.endswith(".json"):
+                # Extract movie name
+                parts = f.split("/")
+                if len(parts) >= 2:
+                    movie_name = parts[1]
+                    movie_folders.add(movie_name)
+        print(f"Found {len(movie_folders)} movies to process")
+        for movie_name in sorted(movie_folders):
+            await process_movie_segments(movie_name)
+            await asyncio.sleep(2)
+        print("\n" + "="*80)
+        print("VIDEO PROCESSING COMPLETE")
+        print(f"Processed: {processing_state['total_processed']}")
+        print(f"Errors: {processing_state['error_count']}")
+        print("="*80 + "\n")
+    except Exception as e:
+        print(f"Critical error: {e}")
+        processing_state["last_error"] = str(e)
+    finally:
+        processing_state["is_running"] = False
+@app.on_event("startup")
+async def startup_event():
+    """Start video processing on server startup."""
+    asyncio.create_task(scan_and_process_videos())
+@app.get("/")
+async def health():
+    """Health check endpoint."""
+    return JSONResponse({
+        "status": "running",
+        "service": "Video Processing Service",
+        "is_processing": processing_state["is_running"],
+        "total_processed": processing_state["total_processed"],
+        "error_count": processing_state["error_count"],
+        "current_file": processing_state["current_file"],
+        "last_error": processing_state["last_error"],
+        "processed_files": processing_state["processed_files"]
+    })
+@app.get("/status")
+async def get_status():
+    """Get current processing status."""
+    return JSONResponse({
+        "is_running": processing_state["is_running"],
+        "total_processed": processing_state["total_processed"],
+        "error_count": processing_state["error_count"],
+        "current_file": processing_state["current_file"],
+        "last_error": processing_state["last_error"],
+        "processed_files": processing_state["processed_files"]
+    })
+@app.post("/trigger-processing")
+async def trigger_processing():
+    """Manually trigger video processing."""
+    if processing_state["is_running"]:
+        return JSONResponse({
+            "status": "already_running",
+            "message": "Video processing is already in progress"
+        })
+    asyncio.create_task(scan_and_process_videos())
+    return JSONResponse({
+        "status": "started",
+        "message": "Video processing scan started"
+    })
+if __name__ == "__main__":
+    print("Starting Video Processing Service on port 7862...")
+    print("Will automatically scan and process videos on startup")
+    uvicorn.run(app, host="0.0.0.0", port=7860)