Spaces:
Running
Running
Update server.py
Browse files
server.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
import os
|
| 3 |
import json
|
|
@@ -41,13 +42,12 @@ processing_state = {
|
|
| 41 |
"current_file": None,
|
| 42 |
"error_count": 0,
|
| 43 |
"last_error": None,
|
| 44 |
-
"processed_files": []
|
|
|
|
| 45 |
}
|
| 46 |
|
| 47 |
-
#
|
| 48 |
-
|
| 49 |
-
whisper_model = WhisperModel("small", device="auto", compute_type="int8")
|
| 50 |
-
print("β Whisper model loaded")
|
| 51 |
|
| 52 |
HF_DATASET_REPO = "factorstudios/movs"
|
| 53 |
HOOKS_FOLDER = "hooks"
|
|
@@ -55,6 +55,15 @@ READY_VIDEOS_FOLDER = "ready_videos"
|
|
| 55 |
TRANSCRIPTION_FOLDER = "transcriptions"
|
| 56 |
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def timestamp_to_seconds(timestamp: str) -> float:
|
| 59 |
"""Convert HH:MM:SS to seconds."""
|
| 60 |
try:
|
|
@@ -72,10 +81,10 @@ def extract_audio_segment(video_path: str, start_seconds: float, end_seconds: fl
|
|
| 72 |
"-ss", str(start_seconds),
|
| 73 |
"-to", str(end_seconds),
|
| 74 |
"-i", video_path,
|
| 75 |
-
"-vn",
|
| 76 |
-
"-acodec", "pcm_s16le",
|
| 77 |
-
"-ar", "16000",
|
| 78 |
-
"-ac", "1",
|
| 79 |
output_wav
|
| 80 |
]
|
| 81 |
result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
@@ -85,14 +94,14 @@ def extract_audio_segment(video_path: str, start_seconds: float, end_seconds: fl
|
|
| 85 |
def transcribe_segment(audio_path: str) -> List[Tuple[float, float, str]]:
|
| 86 |
"""
|
| 87 |
Transcribe audio with Whisper small.
|
| 88 |
-
Returns list of (start_sec, end_sec, text)
|
| 89 |
"""
|
| 90 |
print(" Transcribing audio with Whisper small...")
|
| 91 |
segments, info = whisper_model.transcribe(
|
| 92 |
audio_path,
|
| 93 |
beam_size=5,
|
| 94 |
-
language=None,
|
| 95 |
-
vad_filter=True,
|
| 96 |
vad_parameters=dict(min_silence_duration_ms=500)
|
| 97 |
)
|
| 98 |
|
|
@@ -112,33 +121,27 @@ def apply_color_grading_wedding_retro(frame: np.ndarray) -> np.ndarray:
|
|
| 112 |
lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
|
| 113 |
l_channel, a_channel, b_channel = cv2.split(lab)
|
| 114 |
|
| 115 |
-
# 1. VINTAGE/RETRO: warm tones
|
| 116 |
a_channel = cv2.add(a_channel, 5)
|
| 117 |
b_channel = cv2.add(b_channel, 8)
|
| 118 |
|
| 119 |
-
# 2. WEDDING LOOK: soft highlights via CLAHE
|
| 120 |
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
| 121 |
l_channel = clahe.apply(l_channel)
|
| 122 |
|
| 123 |
lab_enhanced = cv2.merge([l_channel, a_channel, b_channel])
|
| 124 |
frame = cv2.cvtColor(lab_enhanced, cv2.COLOR_LAB2BGR)
|
| 125 |
|
| 126 |
-
# 3. SATURATION BOOST
|
| 127 |
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV).astype(np.float32)
|
| 128 |
hsv[:, :, 1] = np.clip(hsv[:, :, 1] * 1.3, 0, 255)
|
| 129 |
frame = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
|
| 130 |
|
| 131 |
-
# 4. CONTRAST ENHANCEMENT
|
| 132 |
frame = cv2.convertScaleAbs(frame, alpha=1.15, beta=10)
|
| 133 |
|
| 134 |
-
# 5. HIGH SHARPENING
|
| 135 |
kernel = np.array([[-1, -1, -1],
|
| 136 |
[-1, 9, -1],
|
| 137 |
[-1, -1, -1]]) / 1.2
|
| 138 |
sharpened = cv2.filter2D(frame, -1, kernel)
|
| 139 |
frame = cv2.addWeighted(frame, 0.4, sharpened, 0.6, 0)
|
| 140 |
|
| 141 |
-
# 6. SLIGHT VIGNETTE
|
| 142 |
rows, cols = frame.shape[:2]
|
| 143 |
X_kernel = cv2.getGaussianKernel(cols, cols / 2)
|
| 144 |
Y_kernel = cv2.getGaussianKernel(rows, rows / 2)
|
|
@@ -164,7 +167,6 @@ def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int = 36) ->
|
|
| 164 |
except Exception:
|
| 165 |
font = ImageFont.load_default()
|
| 166 |
|
| 167 |
-
# Word-wrap text
|
| 168 |
max_width = width - 80
|
| 169 |
wrapped_lines = []
|
| 170 |
words = text.split()
|
|
@@ -184,8 +186,6 @@ def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int = 36) ->
|
|
| 184 |
|
| 185 |
line_height = font_size + 12
|
| 186 |
total_text_height = len(wrapped_lines) * line_height
|
| 187 |
-
|
| 188 |
-
# Position: 80% down the frame (near bottom, not center)
|
| 189 |
y_start = int(height * 0.80) - total_text_height // 2
|
| 190 |
shadow_offset = 3
|
| 191 |
|
|
@@ -195,9 +195,7 @@ def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int = 36) ->
|
|
| 195 |
x = (width - line_width) // 2
|
| 196 |
y = y_start + i * line_height
|
| 197 |
|
| 198 |
-
# Shadow layer
|
| 199 |
draw.text((x + shadow_offset, y + shadow_offset), line, font=font, fill=(0, 0, 0, 200))
|
| 200 |
-
# Main white text
|
| 201 |
draw.text((x, y), line, font=font, fill=(255, 255, 255, 255))
|
| 202 |
|
| 203 |
frame_pil = Image.alpha_composite(frame_pil, overlay).convert('RGB')
|
|
@@ -205,10 +203,7 @@ def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int = 36) ->
|
|
| 205 |
|
| 206 |
|
| 207 |
def build_frame_caption_map(captions: List[Tuple[float, float, str]], fps: float) -> Dict[int, str]:
|
| 208 |
-
"""
|
| 209 |
-
Convert Whisper (start, end, text) segments into a per-frame caption map.
|
| 210 |
-
Each frame number maps to the caption active at that time.
|
| 211 |
-
"""
|
| 212 |
frame_map = {}
|
| 213 |
for start_sec, end_sec, text in captions:
|
| 214 |
start_frame = int(start_sec * fps)
|
|
@@ -234,7 +229,7 @@ def process_video_segment(
|
|
| 234 |
4. Mux processed video with original audio
|
| 235 |
"""
|
| 236 |
ffmpeg_video_proc = None
|
| 237 |
-
temp_wav =
|
| 238 |
temp_video_path = output_path.replace(".mp4", "_noaudio.mp4")
|
| 239 |
|
| 240 |
try:
|
|
@@ -256,18 +251,17 @@ def process_video_segment(
|
|
| 256 |
print(f"Video info: {fps} fps, {original_width}x{original_height}")
|
| 257 |
print(f"Extracting segment: {start_time} to {end_time} ({duration:.1f}s)")
|
| 258 |
|
| 259 |
-
# ββ Step 1: Extract audio
|
| 260 |
-
temp_wav = output_path.replace(".mp4", "_audio.wav")
|
| 261 |
print(" Extracting audio segment...")
|
| 262 |
audio_ok = extract_audio_segment(video_path, start_seconds, end_seconds, temp_wav)
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
else:
|
| 267 |
-
# ββ Step 2: Transcribe with Whisper βββββββββββββββββββββββββββββββ
|
| 268 |
captions = transcribe_segment(temp_wav)
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
-
# Build per-frame caption lookup from Whisper timestamps
|
| 271 |
frame_caption_map = build_frame_caption_map(captions, fps)
|
| 272 |
|
| 273 |
# ββ Step 3: Process frames β pipe to FFmpeg βββββββββββββββββββββββββββ
|
|
@@ -308,7 +302,6 @@ def process_video_segment(
|
|
| 308 |
print(f"Warning: Could not read frame at position {processed_frames}")
|
| 309 |
break
|
| 310 |
|
| 311 |
-
# Crop to target aspect ratio
|
| 312 |
aspect_ratio = target_width / target_height
|
| 313 |
if original_width / original_height > aspect_ratio:
|
| 314 |
new_width = int(original_height * aspect_ratio)
|
|
@@ -322,12 +315,10 @@ def process_video_segment(
|
|
| 322 |
frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_LANCZOS4)
|
| 323 |
frame = apply_color_grading_wedding_retro(frame)
|
| 324 |
|
| 325 |
-
|
| 326 |
-
if
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
# Clear caption when we're past its end frame
|
| 330 |
-
current_caption = frame_caption_map.get(processed_frames, "")
|
| 331 |
|
| 332 |
if current_caption:
|
| 333 |
frame = burn_captions_to_frame(frame, current_caption)
|
|
@@ -376,7 +367,7 @@ def process_video_segment(
|
|
| 376 |
print(f"β FFmpeg audio mux failed (code {mux_result.returncode})")
|
| 377 |
return False
|
| 378 |
|
| 379 |
-
print(f"β
|
| 380 |
return True
|
| 381 |
|
| 382 |
except Exception as e:
|
|
@@ -390,10 +381,12 @@ def process_video_segment(
|
|
| 390 |
return False
|
| 391 |
|
| 392 |
finally:
|
| 393 |
-
# Clean up temp files
|
| 394 |
for tmp in [temp_video_path, temp_wav]:
|
| 395 |
if tmp and os.path.exists(tmp):
|
| 396 |
-
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
|
| 399 |
async def process_movie_segments(movie_name: str) -> bool:
|
|
@@ -404,7 +397,6 @@ async def process_movie_segments(movie_name: str) -> bool:
|
|
| 404 |
print(f"Processing movie: {movie_name}")
|
| 405 |
print(f"{'='*80}")
|
| 406 |
|
| 407 |
-
# Download original video
|
| 408 |
video_file = f"{movie_name}.mkv"
|
| 409 |
print(f"Downloading video: {video_file}")
|
| 410 |
|
|
@@ -422,7 +414,6 @@ async def process_movie_segments(movie_name: str) -> bool:
|
|
| 422 |
print(f"Error: Could not download video: {e}")
|
| 423 |
return False
|
| 424 |
|
| 425 |
-
# List segment JSON files
|
| 426 |
hooks_folder = f"{HOOKS_FOLDER}/{movie_name}"
|
| 427 |
print(f"Listing segments from: {hooks_folder}")
|
| 428 |
|
|
@@ -442,7 +433,6 @@ async def process_movie_segments(movie_name: str) -> bool:
|
|
| 442 |
return False
|
| 443 |
|
| 444 |
print(f"Found {len(segment_files)} segments")
|
| 445 |
-
|
| 446 |
temp_dir = tempfile.mkdtemp()
|
| 447 |
|
| 448 |
try:
|
|
@@ -519,6 +509,7 @@ async def scan_and_process_videos():
|
|
| 519 |
print("Video processing already running, skipping...")
|
| 520 |
return
|
| 521 |
|
|
|
|
| 522 |
print("Waiting 3 minutes before starting video processing...")
|
| 523 |
await asyncio.sleep(180)
|
| 524 |
|
|
@@ -562,6 +553,11 @@ async def scan_and_process_videos():
|
|
| 562 |
|
| 563 |
@app.on_event("startup")
|
| 564 |
async def startup_event():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 565 |
asyncio.create_task(scan_and_process_videos())
|
| 566 |
|
| 567 |
|
|
@@ -570,6 +566,7 @@ async def health():
|
|
| 570 |
return JSONResponse({
|
| 571 |
"status": "running",
|
| 572 |
"service": "Video Processing Service",
|
|
|
|
| 573 |
"is_processing": processing_state["is_running"],
|
| 574 |
"total_processed": processing_state["total_processed"],
|
| 575 |
"error_count": processing_state["error_count"],
|
|
@@ -582,6 +579,7 @@ async def health():
|
|
| 582 |
@app.get("/status")
|
| 583 |
async def get_status():
|
| 584 |
return JSONResponse({
|
|
|
|
| 585 |
"is_running": processing_state["is_running"],
|
| 586 |
"total_processed": processing_state["total_processed"],
|
| 587 |
"error_count": processing_state["error_count"],
|
|
@@ -598,6 +596,11 @@ async def trigger_processing():
|
|
| 598 |
"status": "already_running",
|
| 599 |
"message": "Video processing is already in progress"
|
| 600 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 601 |
asyncio.create_task(scan_and_process_videos())
|
| 602 |
return JSONResponse({
|
| 603 |
"status": "started",
|
|
@@ -607,5 +610,5 @@ async def trigger_processing():
|
|
| 607 |
|
| 608 |
if __name__ == "__main__":
|
| 609 |
print("Starting Video Processing Service on port 7860...")
|
| 610 |
-
print("
|
| 611 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 1 |
+
ENDOFFILE'
|
| 2 |
#!/usr/bin/env python3
|
| 3 |
import os
|
| 4 |
import json
|
|
|
|
| 42 |
"current_file": None,
|
| 43 |
"error_count": 0,
|
| 44 |
"last_error": None,
|
| 45 |
+
"processed_files": [],
|
| 46 |
+
"whisper_ready": False
|
| 47 |
}
|
| 48 |
|
| 49 |
+
# Whisper model β loaded async at startup, not at import time
|
| 50 |
+
whisper_model = None
|
|
|
|
|
|
|
| 51 |
|
| 52 |
HF_DATASET_REPO = "factorstudios/movs"
|
| 53 |
HOOKS_FOLDER = "hooks"
|
|
|
|
| 55 |
TRANSCRIPTION_FOLDER = "transcriptions"
|
| 56 |
|
| 57 |
|
| 58 |
+
def _load_whisper_model():
|
| 59 |
+
"""Blocking model load β runs in thread executor."""
|
| 60 |
+
global whisper_model
|
| 61 |
+
print("Loading Whisper small model...")
|
| 62 |
+
whisper_model = WhisperModel("small", device="auto", compute_type="int8")
|
| 63 |
+
processing_state["whisper_ready"] = True
|
| 64 |
+
print("β Whisper model loaded")
|
| 65 |
+
|
| 66 |
+
|
| 67 |
def timestamp_to_seconds(timestamp: str) -> float:
|
| 68 |
"""Convert HH:MM:SS to seconds."""
|
| 69 |
try:
|
|
|
|
| 81 |
"-ss", str(start_seconds),
|
| 82 |
"-to", str(end_seconds),
|
| 83 |
"-i", video_path,
|
| 84 |
+
"-vn",
|
| 85 |
+
"-acodec", "pcm_s16le",
|
| 86 |
+
"-ar", "16000",
|
| 87 |
+
"-ac", "1",
|
| 88 |
output_wav
|
| 89 |
]
|
| 90 |
result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
|
|
| 94 |
def transcribe_segment(audio_path: str) -> List[Tuple[float, float, str]]:
|
| 95 |
"""
|
| 96 |
Transcribe audio with Whisper small.
|
| 97 |
+
Returns list of (start_sec, end_sec, text) relative to segment start.
|
| 98 |
"""
|
| 99 |
print(" Transcribing audio with Whisper small...")
|
| 100 |
segments, info = whisper_model.transcribe(
|
| 101 |
audio_path,
|
| 102 |
beam_size=5,
|
| 103 |
+
language=None,
|
| 104 |
+
vad_filter=True,
|
| 105 |
vad_parameters=dict(min_silence_duration_ms=500)
|
| 106 |
)
|
| 107 |
|
|
|
|
| 121 |
lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
|
| 122 |
l_channel, a_channel, b_channel = cv2.split(lab)
|
| 123 |
|
|
|
|
| 124 |
a_channel = cv2.add(a_channel, 5)
|
| 125 |
b_channel = cv2.add(b_channel, 8)
|
| 126 |
|
|
|
|
| 127 |
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
| 128 |
l_channel = clahe.apply(l_channel)
|
| 129 |
|
| 130 |
lab_enhanced = cv2.merge([l_channel, a_channel, b_channel])
|
| 131 |
frame = cv2.cvtColor(lab_enhanced, cv2.COLOR_LAB2BGR)
|
| 132 |
|
|
|
|
| 133 |
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV).astype(np.float32)
|
| 134 |
hsv[:, :, 1] = np.clip(hsv[:, :, 1] * 1.3, 0, 255)
|
| 135 |
frame = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
|
| 136 |
|
|
|
|
| 137 |
frame = cv2.convertScaleAbs(frame, alpha=1.15, beta=10)
|
| 138 |
|
|
|
|
| 139 |
kernel = np.array([[-1, -1, -1],
|
| 140 |
[-1, 9, -1],
|
| 141 |
[-1, -1, -1]]) / 1.2
|
| 142 |
sharpened = cv2.filter2D(frame, -1, kernel)
|
| 143 |
frame = cv2.addWeighted(frame, 0.4, sharpened, 0.6, 0)
|
| 144 |
|
|
|
|
| 145 |
rows, cols = frame.shape[:2]
|
| 146 |
X_kernel = cv2.getGaussianKernel(cols, cols / 2)
|
| 147 |
Y_kernel = cv2.getGaussianKernel(rows, rows / 2)
|
|
|
|
| 167 |
except Exception:
|
| 168 |
font = ImageFont.load_default()
|
| 169 |
|
|
|
|
| 170 |
max_width = width - 80
|
| 171 |
wrapped_lines = []
|
| 172 |
words = text.split()
|
|
|
|
| 186 |
|
| 187 |
line_height = font_size + 12
|
| 188 |
total_text_height = len(wrapped_lines) * line_height
|
|
|
|
|
|
|
| 189 |
y_start = int(height * 0.80) - total_text_height // 2
|
| 190 |
shadow_offset = 3
|
| 191 |
|
|
|
|
| 195 |
x = (width - line_width) // 2
|
| 196 |
y = y_start + i * line_height
|
| 197 |
|
|
|
|
| 198 |
draw.text((x + shadow_offset, y + shadow_offset), line, font=font, fill=(0, 0, 0, 200))
|
|
|
|
| 199 |
draw.text((x, y), line, font=font, fill=(255, 255, 255, 255))
|
| 200 |
|
| 201 |
frame_pil = Image.alpha_composite(frame_pil, overlay).convert('RGB')
|
|
|
|
| 203 |
|
| 204 |
|
| 205 |
def build_frame_caption_map(captions: List[Tuple[float, float, str]], fps: float) -> Dict[int, str]:
|
| 206 |
+
"""Convert Whisper segments into a per-frame caption lookup."""
|
|
|
|
|
|
|
|
|
|
| 207 |
frame_map = {}
|
| 208 |
for start_sec, end_sec, text in captions:
|
| 209 |
start_frame = int(start_sec * fps)
|
|
|
|
| 229 |
4. Mux processed video with original audio
|
| 230 |
"""
|
| 231 |
ffmpeg_video_proc = None
|
| 232 |
+
temp_wav = output_path.replace(".mp4", "_audio.wav")
|
| 233 |
temp_video_path = output_path.replace(".mp4", "_noaudio.mp4")
|
| 234 |
|
| 235 |
try:
|
|
|
|
| 251 |
print(f"Video info: {fps} fps, {original_width}x{original_height}")
|
| 252 |
print(f"Extracting segment: {start_time} to {end_time} ({duration:.1f}s)")
|
| 253 |
|
| 254 |
+
# ββ Step 1: Extract audio β WAV βββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 255 |
print(" Extracting audio segment...")
|
| 256 |
audio_ok = extract_audio_segment(video_path, start_seconds, end_seconds, temp_wav)
|
| 257 |
+
|
| 258 |
+
# ββ Step 2: Transcribe with Whisper βββββββββββββββββββββββββββββββββββ
|
| 259 |
+
if audio_ok and whisper_model is not None:
|
|
|
|
|
|
|
| 260 |
captions = transcribe_segment(temp_wav)
|
| 261 |
+
else:
|
| 262 |
+
print(" Warning: Skipping transcription (audio failed or model not ready)")
|
| 263 |
+
captions = []
|
| 264 |
|
|
|
|
| 265 |
frame_caption_map = build_frame_caption_map(captions, fps)
|
| 266 |
|
| 267 |
# ββ Step 3: Process frames β pipe to FFmpeg βββββββββββββββββββββββββββ
|
|
|
|
| 302 |
print(f"Warning: Could not read frame at position {processed_frames}")
|
| 303 |
break
|
| 304 |
|
|
|
|
| 305 |
aspect_ratio = target_width / target_height
|
| 306 |
if original_width / original_height > aspect_ratio:
|
| 307 |
new_width = int(original_height * aspect_ratio)
|
|
|
|
| 315 |
frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_LANCZOS4)
|
| 316 |
frame = apply_color_grading_wedding_retro(frame)
|
| 317 |
|
| 318 |
+
current_caption = frame_caption_map.get(processed_frames, current_caption)
|
| 319 |
+
# Clear caption if this frame isn't in the map and the previous caption has ended
|
| 320 |
+
if processed_frames not in frame_caption_map:
|
| 321 |
+
current_caption = ""
|
|
|
|
|
|
|
| 322 |
|
| 323 |
if current_caption:
|
| 324 |
frame = burn_captions_to_frame(frame, current_caption)
|
|
|
|
| 367 |
print(f"β FFmpeg audio mux failed (code {mux_result.returncode})")
|
| 368 |
return False
|
| 369 |
|
| 370 |
+
print(f"β Segment complete: {output_path}")
|
| 371 |
return True
|
| 372 |
|
| 373 |
except Exception as e:
|
|
|
|
| 381 |
return False
|
| 382 |
|
| 383 |
finally:
|
|
|
|
| 384 |
for tmp in [temp_video_path, temp_wav]:
|
| 385 |
if tmp and os.path.exists(tmp):
|
| 386 |
+
try:
|
| 387 |
+
os.remove(tmp)
|
| 388 |
+
except Exception:
|
| 389 |
+
pass
|
| 390 |
|
| 391 |
|
| 392 |
async def process_movie_segments(movie_name: str) -> bool:
|
|
|
|
| 397 |
print(f"Processing movie: {movie_name}")
|
| 398 |
print(f"{'='*80}")
|
| 399 |
|
|
|
|
| 400 |
video_file = f"{movie_name}.mkv"
|
| 401 |
print(f"Downloading video: {video_file}")
|
| 402 |
|
|
|
|
| 414 |
print(f"Error: Could not download video: {e}")
|
| 415 |
return False
|
| 416 |
|
|
|
|
| 417 |
hooks_folder = f"{HOOKS_FOLDER}/{movie_name}"
|
| 418 |
print(f"Listing segments from: {hooks_folder}")
|
| 419 |
|
|
|
|
| 433 |
return False
|
| 434 |
|
| 435 |
print(f"Found {len(segment_files)} segments")
|
|
|
|
| 436 |
temp_dir = tempfile.mkdtemp()
|
| 437 |
|
| 438 |
try:
|
|
|
|
| 509 |
print("Video processing already running, skipping...")
|
| 510 |
return
|
| 511 |
|
| 512 |
+
# Wait 3 minutes for Space to fully initialize
|
| 513 |
print("Waiting 3 minutes before starting video processing...")
|
| 514 |
await asyncio.sleep(180)
|
| 515 |
|
|
|
|
| 553 |
|
| 554 |
@app.on_event("startup")
|
| 555 |
async def startup_event():
|
| 556 |
+
"""Load Whisper in background, then kick off video processing after 3 min."""
|
| 557 |
+
loop = asyncio.get_event_loop()
|
| 558 |
+
# Load Whisper model in thread so it doesn't block the event loop / health check
|
| 559 |
+
await loop.run_in_executor(None, _load_whisper_model)
|
| 560 |
+
# Kick off processing task (has its own 3-min delay inside)
|
| 561 |
asyncio.create_task(scan_and_process_videos())
|
| 562 |
|
| 563 |
|
|
|
|
| 566 |
return JSONResponse({
|
| 567 |
"status": "running",
|
| 568 |
"service": "Video Processing Service",
|
| 569 |
+
"whisper_ready": processing_state["whisper_ready"],
|
| 570 |
"is_processing": processing_state["is_running"],
|
| 571 |
"total_processed": processing_state["total_processed"],
|
| 572 |
"error_count": processing_state["error_count"],
|
|
|
|
| 579 |
@app.get("/status")
|
| 580 |
async def get_status():
|
| 581 |
return JSONResponse({
|
| 582 |
+
"whisper_ready": processing_state["whisper_ready"],
|
| 583 |
"is_running": processing_state["is_running"],
|
| 584 |
"total_processed": processing_state["total_processed"],
|
| 585 |
"error_count": processing_state["error_count"],
|
|
|
|
| 596 |
"status": "already_running",
|
| 597 |
"message": "Video processing is already in progress"
|
| 598 |
})
|
| 599 |
+
if not processing_state["whisper_ready"]:
|
| 600 |
+
return JSONResponse({
|
| 601 |
+
"status": "not_ready",
|
| 602 |
+
"message": "Whisper model is still loading, try again shortly"
|
| 603 |
+
})
|
| 604 |
asyncio.create_task(scan_and_process_videos())
|
| 605 |
return JSONResponse({
|
| 606 |
"status": "started",
|
|
|
|
| 610 |
|
| 611 |
if __name__ == "__main__":
|
| 612 |
print("Starting Video Processing Service on port 7860...")
|
| 613 |
+
print("Whisper will load at startup, processing begins 3 minutes after")
|
| 614 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|