factorstudios commited on
Commit
99a0700
·
verified ·
1 Parent(s): a70e27b

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +156 -538
server.py CHANGED
@@ -1,16 +1,17 @@
1
  #!/usr/bin/env python3
2
  import os
3
  import json
4
- import re
5
  import asyncio
6
  import tempfile
7
  import subprocess
 
 
8
  from pathlib import Path
9
  from datetime import datetime
10
  from dotenv import load_dotenv
11
  from typing import List, Dict, Optional, Tuple
12
 
13
- from fastapi import FastAPI, HTTPException
14
  from fastapi.responses import JSONResponse
15
  import uvicorn
16
 
@@ -22,15 +23,14 @@ try:
22
  from faster_whisper import WhisperModel
23
  except ImportError as e:
24
  print(f"Missing dependency: {e}")
25
- print("Install with: pip install faster-whisper")
26
  exit(1)
27
 
28
  # Load environment variables
29
  load_dotenv()
30
  HF_TOKEN = os.getenv("HF_TOKEN")
31
- if not HF_TOKEN:
32
- print("Error: Missing HF_TOKEN in .env")
33
- exit(1)
34
 
35
  app = FastAPI(title="Video Processing Service")
36
 
@@ -42,581 +42,199 @@ processing_state = {
42
  "error_count": 0,
43
  "last_error": None,
44
  "processed_files": [],
45
- "whisper_ready": False
 
46
  }
47
 
48
- # Whisper model — loaded async at startup, not at import time
49
  whisper_model = None
50
 
51
- HF_DATASET_REPO = "factorstudios/movs"
52
- HOOKS_FOLDER = "hooks"
53
- READY_VIDEOS_FOLDER = "ready_videos"
54
- TRANSCRIPTION_FOLDER = "transcriptions"
55
-
 
 
 
 
 
56
 
57
  def _load_whisper_model():
58
- """Blocking model load runs in thread executor."""
59
  global whisper_model
60
- print("Loading Whisper small model...")
61
- whisper_model = WhisperModel("small", device="auto", compute_type="int8")
62
- processing_state["whisper_ready"] = True
63
- print(" Whisper model loaded")
64
-
 
 
65
 
66
  def timestamp_to_seconds(timestamp: str) -> float:
67
- """Convert HH:MM:SS to seconds."""
68
  try:
69
  parts = timestamp.split(":")
70
- return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
71
- except Exception as e:
72
- print(f"Error converting timestamp {timestamp}: {e}")
 
73
  return 0.0
74
 
75
-
76
- def extract_audio_segment(video_path: str, start_seconds: float, end_seconds: float, output_wav: str) -> bool:
77
- """Extract audio segment from video as WAV for Whisper."""
78
- cmd = [
79
- "ffmpeg", "-y",
80
- "-ss", str(start_seconds),
81
- "-to", str(end_seconds),
82
- "-i", video_path,
83
- "-vn",
84
- "-acodec", "pcm_s16le",
85
- "-ar", "16000",
86
- "-ac", "1",
87
- output_wav
88
- ]
89
- result = subprocess.run(cmd, capture_output=True, text=True)
90
- if result.returncode != 0:
91
- print(f" ✗ FFmpeg audio extraction failed: {result.stderr}")
92
- return False
93
- if not os.path.exists(output_wav):
94
- print(f" ✗ Output WAV file not created: {output_wav}")
95
- return False
96
- print(f" ✓ Audio extracted successfully")
97
- return True
98
-
99
-
100
- def transcribe_segment(audio_path: str) -> List[Tuple[float, float, str]]:
101
- """
102
- Transcribe audio with Whisper small.
103
- Returns list of (start_sec, end_sec, text) relative to segment start.
104
- """
105
- print(" Transcribing audio with Whisper small...")
106
- segments, info = whisper_model.transcribe(
107
- audio_path,
108
- beam_size=5,
109
- language=None,
110
- vad_filter=True,
111
- vad_parameters=dict(min_silence_duration_ms=500)
112
- )
113
-
114
- captions = []
115
- for seg in segments:
116
- text = seg.text.strip()
117
- if text:
118
- captions.append((seg.start, seg.end, text))
119
- print(f" [{seg.start:.1f}s → {seg.end:.1f}s] {text}")
120
-
121
- print(f" ✓ Transcribed {len(captions)} caption segments")
122
- return captions
123
-
124
-
125
- def apply_color_grading_wedding_retro(frame: np.ndarray) -> np.ndarray:
126
- """Apply cinematic wedding LUT + retro style with high sharpening."""
127
  lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
128
- l_channel, a_channel, b_channel = cv2.split(lab)
129
-
130
- a_channel = cv2.add(a_channel, 5)
131
- b_channel = cv2.add(b_channel, 8)
132
-
133
  clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
134
- l_channel = clahe.apply(l_channel)
135
-
136
- lab_enhanced = cv2.merge([l_channel, a_channel, b_channel])
137
- frame = cv2.cvtColor(lab_enhanced, cv2.COLOR_LAB2BGR)
138
-
139
- hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV).astype(np.float32)
140
- hsv[:, :, 1] = np.clip(hsv[:, :, 1] * 1.3, 0, 255)
141
- frame = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
142
-
143
- frame = cv2.convertScaleAbs(frame, alpha=1.15, beta=10)
144
-
145
- kernel = np.array([[-1, -1, -1],
146
- [-1, 9, -1],
147
- [-1, -1, -1]]) / 1.2
148
  sharpened = cv2.filter2D(frame, -1, kernel)
149
- frame = cv2.addWeighted(frame, 0.4, sharpened, 0.6, 0)
150
-
151
- rows, cols = frame.shape[:2]
152
- X_kernel = cv2.getGaussianKernel(cols, cols / 2)
153
- Y_kernel = cv2.getGaussianKernel(rows, rows / 2)
154
- mask = (Y_kernel * X_kernel.T)
155
- mask = (mask / mask.max()) ** 0.4
156
-
157
- for i in range(3):
158
- frame[:, :, i] = frame[:, :, i] * mask
159
-
160
- return np.clip(frame, 0, 255).astype(np.uint8)
161
-
162
-
163
- def burn_captions_to_frame(frame: np.ndarray, text: str, font_size: int = 36) -> np.ndarray:
164
- """Burn caption text onto frame — shadow only, no background, positioned near bottom."""
165
- height, width = frame.shape[:2]
166
-
167
- frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).convert('RGBA')
168
- overlay = Image.new('RGBA', frame_pil.size, (0, 0, 0, 0))
169
- draw = ImageDraw.Draw(overlay)
170
 
 
 
 
 
171
  try:
172
  font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
173
- except Exception:
174
  font = ImageFont.load_default()
175
-
176
- max_width = width - 80
177
- wrapped_lines = []
178
- words = text.split()
179
- current_line = []
180
-
181
- for word in words:
182
- test_line = ' '.join(current_line + [word])
183
- bbox = draw.textbbox((0, 0), test_line, font=font)
184
- if bbox[2] - bbox[0] > max_width:
185
- if current_line:
186
- wrapped_lines.append(' '.join(current_line))
187
- current_line = [word]
188
  else:
189
- current_line.append(word)
190
- if current_line:
191
- wrapped_lines.append(' '.join(current_line))
192
-
193
- line_height = font_size + 12
194
- total_text_height = len(wrapped_lines) * line_height
195
- y_start = int(height * 0.80) - total_text_height // 2
196
- shadow_offset = 3
197
-
198
- for i, line in enumerate(wrapped_lines):
199
  bbox = draw.textbbox((0, 0), line, font=font)
200
- line_width = bbox[2] - bbox[0]
201
- x = (width - line_width) // 2
202
- y = y_start + i * line_height
203
-
204
- draw.text((x + shadow_offset, y + shadow_offset), line, font=font, fill=(0, 0, 0, 200))
205
- draw.text((x, y), line, font=font, fill=(255, 255, 255, 255))
206
-
207
- frame_pil = Image.alpha_composite(frame_pil, overlay).convert('RGB')
208
- return cv2.cvtColor(np.array(frame_pil), cv2.COLOR_RGB2BGR)
209
-
210
-
211
- def build_frame_caption_map(captions: List[Tuple[float, float, str]], fps: float) -> Dict[int, str]:
212
- """Convert Whisper segments into a per-frame caption lookup."""
213
- frame_map = {}
214
- for start_sec, end_sec, text in captions:
215
- start_frame = int(start_sec * fps)
216
- end_frame = int(end_sec * fps)
217
- for f in range(start_frame, end_frame + 1):
218
- frame_map[f] = text
219
- return frame_map
220
-
221
-
222
- def process_video_segment(
223
- video_path: str,
224
- output_path: str,
225
- start_time: str,
226
- end_time: str,
227
- target_width: int = 1080,
228
- target_height: int = 1350
229
- ) -> bool:
230
- """
231
- Full pipeline:
232
- 1. Extract audio segment → WAV
233
- 2. Transcribe with Whisper small
234
- 3. Process frames with color grading + caption burn-in
235
- 4. Mux processed video with original audio
236
- """
237
- ffmpeg_video_proc = None
238
- temp_wav = output_path.replace(".mp4", "_audio.wav")
239
- temp_video_path = output_path.replace(".mp4", "_noaudio.mp4")
240
-
241
  try:
242
- print(f"Opening video: {video_path}")
243
- cap = cv2.VideoCapture(video_path)
244
-
245
- if not cap.isOpened():
246
- print(f"Error: Could not open video {video_path}")
247
- return False
248
-
249
- fps = cap.get(cv2.CAP_PROP_FPS)
250
- original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
251
- original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
252
-
253
- start_seconds = timestamp_to_seconds(start_time)
254
- end_seconds = timestamp_to_seconds(end_time)
255
- duration = end_seconds - start_seconds
256
-
257
- print(f"Video info: {fps} fps, {original_width}x{original_height}")
258
- print(f"Extracting segment: {start_time} to {end_time} ({duration:.1f}s)")
259
-
260
- # ── Step 1: Extract audio → WAV ───────────────────────────────────────
261
- print(" Extracting audio segment...")
262
- audio_ok = extract_audio_segment(video_path, start_seconds, end_seconds, temp_wav)
263
-
264
- # ── Step 2: Transcribe with Whisper ───────────────────────────────────
265
- if audio_ok and whisper_model is not None:
266
- captions = transcribe_segment(temp_wav)
267
- else:
268
- if not audio_ok:
269
- print(" ✗ Skipping transcription: audio extraction failed")
270
- elif whisper_model is None:
271
- print(" ✗ Skipping transcription: Whisper model not ready")
272
- captions = []
273
-
274
- frame_caption_map = build_frame_caption_map(captions, fps)
275
-
276
- # ── Step 3: Process frames → pipe to FFmpeg ───────────────────────────
277
- ffmpeg_video_cmd = [
278
- "ffmpeg", "-y",
279
- "-f", "rawvideo",
280
- "-vcodec", "rawvideo",
281
- "-s", f"{target_width}x{target_height}",
282
- "-pix_fmt", "bgr24",
283
- "-r", str(fps),
284
- "-i", "pipe:0",
285
- "-vcodec", "libx264",
286
- "-preset", "fast",
287
- "-crf", "23",
288
- "-pix_fmt", "yuv420p",
289
- temp_video_path
290
  ]
291
-
292
- ffmpeg_video_proc = subprocess.Popen(
293
- ffmpeg_video_cmd,
294
- stdin=subprocess.PIPE,
295
- stdout=subprocess.DEVNULL,
296
- stderr=subprocess.DEVNULL
297
- )
298
-
299
- start_frame = int(start_seconds * fps)
300
- cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
301
-
302
- current_caption = ""
303
- processed_frames = 0
304
- target_frames = int(duration * fps)
305
-
306
- print(f"Processing {target_frames} frames...")
307
-
308
- while processed_frames < target_frames:
309
  ret, frame = cap.read()
310
- if not ret:
311
- print(f"Warning: Could not read frame at position {processed_frames}")
312
- break
313
-
314
- aspect_ratio = target_width / target_height
315
- if original_width / original_height > aspect_ratio:
316
- new_width = int(original_height * aspect_ratio)
317
- x_offset = (original_width - new_width) // 2
318
- frame = frame[:, x_offset:x_offset + new_width]
319
  else:
320
- new_height = int(original_width / aspect_ratio)
321
- y_offset = (original_height - new_height) // 2
322
- frame = frame[y_offset:y_offset + new_height, :]
323
-
324
- frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_LANCZOS4)
325
- frame = apply_color_grading_wedding_retro(frame)
326
-
327
- # Set caption for this frame (empty if none).
328
- current_caption = frame_caption_map.get(processed_frames, "")
329
-
330
- if current_caption:
331
- frame = burn_captions_to_frame(frame, current_caption)
332
-
333
- ffmpeg_video_proc.stdin.write(frame.tobytes())
334
- processed_frames += 1
335
-
336
- if processed_frames % max(1, target_frames // 10) == 0:
337
- progress = (processed_frames / target_frames) * 100
338
- print(f"Progress: {progress:.1f}%")
339
-
340
- ffmpeg_video_proc.stdin.close()
341
- ffmpeg_video_proc.wait()
342
  cap.release()
343
-
344
- if ffmpeg_video_proc.returncode != 0:
345
- print(f"✗ FFmpeg video encoding failed (code {ffmpeg_video_proc.returncode})")
346
- return False
347
-
348
- print("✓ Frames encoded, muxing audio...")
349
-
350
- # ── Step 4: Mux processed video + original audio ──────────────────────
351
- ffmpeg_mux_cmd = [
352
- "ffmpeg", "-y",
353
- "-i", temp_video_path,
354
- "-ss", str(start_seconds),
355
- "-to", str(end_seconds),
356
- "-i", video_path,
357
- "-map", "0:v:0",
358
- "-map", "1:a:0",
359
- "-c:v", "copy",
360
- "-c:a", "aac",
361
- "-b:a", "192k",
362
- "-shortest",
363
- "-movflags", "+faststart",
364
- output_path
365
- ]
366
-
367
- mux_result = subprocess.run(
368
- ffmpeg_mux_cmd,
369
- stdout=subprocess.DEVNULL,
370
- stderr=subprocess.DEVNULL
371
- )
372
-
373
- if mux_result.returncode != 0:
374
- print(f"✗ FFmpeg audio mux failed (code {mux_result.returncode})")
375
- return False
376
-
377
- print(f"✓ Segment complete: {output_path}")
378
- return True
379
-
380
  except Exception as e:
381
- print(f"Error processing video segment: {e}")
382
- if ffmpeg_video_proc is not None:
383
- try:
384
- ffmpeg_video_proc.stdin.close()
385
- except Exception:
386
- pass
387
- ffmpeg_video_proc.wait()
388
  return False
389
-
390
  finally:
391
- for tmp in [temp_video_path, temp_wav]:
392
- if tmp and os.path.exists(tmp):
393
- try:
394
- os.remove(tmp)
395
- except Exception:
396
- pass
397
-
398
-
399
- async def process_movie_segments(movie_name: str) -> bool:
400
- """Process all segments for a movie."""
401
- try:
402
- processing_state["current_file"] = movie_name
403
- print(f"\n{'='*80}")
404
- print(f"Processing movie: {movie_name}")
405
- print(f"{'='*80}")
406
-
407
- video_file = f"{movie_name}.mkv"
408
- print(f"Downloading video: {video_file}")
409
-
410
- try:
411
- video_path = hf_hub_download(
412
- repo_id=HF_DATASET_REPO,
413
- filename=video_file,
414
- repo_type="dataset",
415
- token=HF_TOKEN,
416
- cache_dir="/tmp/video_processor_cache"
417
- )
418
- if os.path.islink(video_path):
419
- video_path = os.path.realpath(video_path)
420
- except Exception as e:
421
- print(f"Error: Could not download video: {e}")
422
- return False
423
-
424
- hooks_folder = f"{HOOKS_FOLDER}/{movie_name}"
425
- print(f"Listing segments from: {hooks_folder}")
426
-
427
- files = list_repo_files(
428
- repo_id=HF_DATASET_REPO,
429
- repo_type="dataset",
430
- token=HF_TOKEN
431
- )
432
-
433
- segment_files = sorted([
434
- f for f in files
435
- if f.startswith(f"{hooks_folder}/") and f.endswith(".json")
436
- ])
437
-
438
- if not segment_files:
439
- print(f"No segment JSON files found for {movie_name}")
440
- return False
441
-
442
- print(f"Found {len(segment_files)} segments")
443
- temp_dir = tempfile.mkdtemp()
444
-
445
- try:
446
- for segment_file in segment_files:
447
- try:
448
- segment_path = hf_hub_download(
449
- repo_id=HF_DATASET_REPO,
450
- filename=segment_file,
451
- repo_type="dataset",
452
- token=HF_TOKEN,
453
- cache_dir="/tmp/video_processor_cache"
454
- )
455
-
456
- with open(segment_path, 'r', encoding='utf-8') as f:
457
- segment_data = json.load(f)
458
-
459
- segment_number = segment_data.get("segment_number", 1)
460
- start_time = segment_data.get("start_time", "00:00:00")
461
- end_time = segment_data.get("end_time", "00:10:00")
462
-
463
- print(f"\nProcessing segment {segment_number}: {start_time} to {end_time}")
464
-
465
- output_filename = f"segment-{segment_number:02d}.mp4"
466
- output_path = os.path.join(temp_dir, output_filename)
467
-
468
- success = process_video_segment(
469
- video_path,
470
- output_path,
471
- start_time,
472
- end_time
473
- )
474
-
475
- if not success:
476
- print(f"Failed to process segment {segment_number}")
477
- continue
478
-
479
- upload_path = f"{READY_VIDEOS_FOLDER}/{movie_name}/{output_filename}"
480
- print(f"Uploading to: {upload_path}")
481
-
482
- upload_file(
483
- path_or_fileobj=output_path,
484
- path_in_repo=upload_path,
485
- repo_id=HF_DATASET_REPO,
486
- repo_type="dataset",
487
- token=HF_TOKEN,
488
- commit_message=f"Add processed video segment {segment_number} for {movie_name}"
489
- )
490
- print(f"✓ Segment {segment_number} uploaded successfully")
491
-
492
- except Exception as e:
493
- print(f"✗ Error processing segment: {e}")
494
- processing_state["error_count"] += 1
495
- continue
496
-
497
- finally:
498
- import shutil
499
- shutil.rmtree(temp_dir, ignore_errors=True)
500
-
501
- processing_state["processed_files"].append(movie_name)
502
- processing_state["total_processed"] += 1
503
- print(f"\n✓ Successfully processed all segments for {movie_name}")
504
- return True
505
-
506
- except Exception as e:
507
- processing_state["error_count"] += 1
508
- processing_state["last_error"] = str(e)
509
- print(f"✗ Error: {e}")
510
- return False
511
-
512
-
513
- async def scan_and_process_videos():
514
- """Scan hooks folder and process all movies."""
515
- if processing_state["is_running"]:
516
- print("Video processing already running, skipping...")
517
- return
518
-
519
- # Wait for Space to fully initialize (reduced for testing)
520
- startup_delay = int(os.getenv("STARTUP_DELAY", 5)) # Default 5 seconds for testing
521
- print(f"Waiting {startup_delay} seconds before starting video processing...")
522
- await asyncio.sleep(startup_delay)
523
 
 
 
524
  processing_state["is_running"] = True
525
- print("\n" + "="*80)
526
- print("STARTING VIDEO PROCESSING SERVICE")
527
- print("="*80)
528
-
529
  try:
530
- files = list_repo_files(
531
- repo_id=HF_DATASET_REPO,
532
- repo_type="dataset",
533
- token=HF_TOKEN
534
- )
535
-
536
- movie_folders = set()
537
- for f in files:
538
- if f.startswith(f"{HOOKS_FOLDER}/") and f.endswith(".json"):
539
- parts = f.split("/")
540
- if len(parts) >= 2:
541
- movie_folders.add(parts[1])
542
-
543
- print(f"Found {len(movie_folders)} movies to process")
544
-
545
- for movie_name in sorted(movie_folders):
546
- await process_movie_segments(movie_name)
547
  await asyncio.sleep(2)
548
-
549
- print("\n" + "="*80)
550
- print("VIDEO PROCESSING COMPLETE")
551
- print(f"Processed: {processing_state['total_processed']}")
552
- print(f"Errors: {processing_state['error_count']}")
553
- print("="*80 + "\n")
554
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
555
  except Exception as e:
556
- print(f"Critical error: {e}")
557
  processing_state["last_error"] = str(e)
558
  finally:
559
  processing_state["is_running"] = False
560
-
561
 
562
  @app.on_event("startup")
563
  async def startup_event():
564
- """Load Whisper in background, then kick off video processing after 3 min."""
565
- loop = asyncio.get_event_loop()
566
- # Load Whisper model in thread so it doesn't block the event loop / health check
567
- await loop.run_in_executor(None, _load_whisper_model)
568
- # Kick off processing task (has its own 3-min delay inside)
569
- asyncio.create_task(scan_and_process_videos())
570
-
571
 
572
  @app.get("/")
573
- async def health():
574
- return JSONResponse({
575
- "status": "running",
576
- "service": "Video Processing Service",
577
- "whisper_ready": processing_state["whisper_ready"],
578
- "is_processing": processing_state["is_running"],
579
- "total_processed": processing_state["total_processed"],
580
- "error_count": processing_state["error_count"],
581
- "current_file": processing_state["current_file"],
582
- "last_error": processing_state["last_error"],
583
- "processed_files": processing_state["processed_files"]
584
- })
585
-
586
-
587
  @app.get("/status")
588
- async def get_status():
589
- return JSONResponse({
590
- "whisper_ready": processing_state["whisper_ready"],
591
- "is_running": processing_state["is_running"],
592
- "total_processed": processing_state["total_processed"],
593
- "error_count": processing_state["error_count"],
594
- "current_file": processing_state["current_file"],
595
- "last_error": processing_state["last_error"],
596
- "processed_files": processing_state["processed_files"]
597
- })
598
-
599
-
600
- @app.post("/trigger-processing")
601
- async def trigger_processing():
602
- if processing_state["is_running"]:
603
- return JSONResponse({
604
- "status": "already_running",
605
- "message": "Video processing is already in progress"
606
- })
607
- if not processing_state["whisper_ready"]:
608
- return JSONResponse({
609
- "status": "not_ready",
610
- "message": "Whisper model is still loading, try again shortly"
611
- })
612
- asyncio.create_task(scan_and_process_videos())
613
- return JSONResponse({
614
- "status": "started",
615
- "message": "Video processing scan started"
616
- })
617
-
618
 
619
  if __name__ == "__main__":
620
- print("Starting Video Processing Service on port 7860...")
621
- print("Whisper will load at startup, processing begins 3 minutes after")
622
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  #!/usr/bin/env python3
2
  import os
3
  import json
 
4
  import asyncio
5
  import tempfile
6
  import subprocess
7
+ import shutil
8
+ import time
9
  from pathlib import Path
10
  from datetime import datetime
11
  from dotenv import load_dotenv
12
  from typing import List, Dict, Optional, Tuple
13
 
14
+ from fastapi import FastAPI
15
  from fastapi.responses import JSONResponse
16
  import uvicorn
17
 
 
23
  from faster_whisper import WhisperModel
24
  except ImportError as e:
25
  print(f"Missing dependency: {e}")
 
26
  exit(1)
27
 
28
  # Load environment variables
29
  load_dotenv()
30
  HF_TOKEN = os.getenv("HF_TOKEN")
31
+ HF_DATASET_REPO = "factorstudios/movs"
32
+ HOOKS_FOLDER = "hooks"
33
+ READY_VIDEOS_FOLDER = "ready_videos"
34
 
35
  app = FastAPI(title="Video Processing Service")
36
 
 
42
  "error_count": 0,
43
  "last_error": None,
44
  "processed_files": [],
45
+ "whisper_ready": False,
46
+ "log": []
47
  }
48
 
 
49
  whisper_model = None
50
 
51
+ def add_log(msg):
52
+ # Print to console as requested
53
+ timestamp = datetime.now().strftime('%H:%M:%S')
54
+ formatted_msg = f"[{timestamp}] {msg}"
55
+ print(formatted_msg)
56
+
57
+ # Also keep in state for API status checks
58
+ processing_state["log"].append(formatted_msg)
59
+ if len(processing_state["log"]) > 100:
60
+ processing_state["log"].pop(0)
61
 
62
  def _load_whisper_model():
63
+ """Load model in a way that doesn't block the event loop."""
64
  global whisper_model
65
+ try:
66
+ add_log("Starting Whisper model load...")
67
+ whisper_model = WhisperModel("small", device="auto", compute_type="int8")
68
+ processing_state["whisper_ready"] = True
69
+ add_log("✓ Whisper model loaded successfully")
70
+ except Exception as e:
71
+ add_log(f"✗ Failed to load Whisper model: {e}")
72
 
73
  def timestamp_to_seconds(timestamp: str) -> float:
 
74
  try:
75
  parts = timestamp.split(":")
76
+ if len(parts) == 3:
77
+ return int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2])
78
+ return 0.0
79
+ except:
80
  return 0.0
81
 
82
+ def apply_color_grading(frame):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
84
+ l, a, b = cv2.split(lab)
 
 
 
 
85
  clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
86
+ l = clahe.apply(l)
87
+ frame = cv2.cvtColor(cv2.merge([l, a, b]), cv2.COLOR_LAB2BGR)
88
+ kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]]) / 1.2
 
 
 
 
 
 
 
 
 
 
 
89
  sharpened = cv2.filter2D(frame, -1, kernel)
90
+ return cv2.addWeighted(frame, 0.4, sharpened, 0.6, 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ def burn_captions(frame, text, font_size=40):
93
+ h, w = frame.shape[:2]
94
+ pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).convert('RGBA')
95
+ draw = ImageDraw.Draw(pil_img)
96
  try:
97
  font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", font_size)
98
+ except:
99
  font = ImageFont.load_default()
100
+ lines, curr = [], []
101
+ for word in text.split():
102
+ test = ' '.join(curr + [word])
103
+ if draw.textbbox((0, 0), test, font=font)[2] < w - 100:
104
+ curr.append(word)
 
 
 
 
 
 
 
 
105
  else:
106
+ lines.append(' '.join(curr))
107
+ curr = [word]
108
+ if curr: lines.append(' '.join(curr))
109
+ y = int(h * 0.8)
110
+ for line in lines:
 
 
 
 
 
111
  bbox = draw.textbbox((0, 0), line, font=font)
112
+ x = (w - (bbox[2] - bbox[0])) // 2
113
+ draw.text((x+2, y+2), line, font=font, fill=(0,0,0,180))
114
+ draw.text((x, y), line, font=font, fill=(255,255,255,255))
115
+ y += font_size + 10
116
+ return cv2.cvtColor(np.array(pil_img.convert('RGB')), cv2.COLOR_RGB2BGR)
117
+
118
+ def process_video_sync(video_path, output_path, start_t, end_t):
119
+ temp_seg = output_path + ".seg.mp4"
120
+ temp_no_audio = output_path + ".noaudio.mp4"
121
+ temp_wav = output_path + ".wav"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  try:
123
+ start_s = timestamp_to_seconds(start_t)
124
+ end_s = timestamp_to_seconds(end_t)
125
+ subprocess.run(["ffmpeg", "-y", "-ss", str(start_s), "-to", str(end_s), "-i", video_path, "-c", "copy", temp_seg], capture_output=True)
126
+ subprocess.run(["ffmpeg", "-y", "-i", temp_seg, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", temp_wav], capture_output=True)
127
+ captions = []
128
+ if whisper_model:
129
+ segs, _ = whisper_model.transcribe(temp_wav)
130
+ captions = [(s.start, s.end, s.text.strip()) for s in segs if s.text.strip()]
131
+ cap = cv2.VideoCapture(temp_seg)
132
+ fps = cap.get(cv2.CAP_PROP_FPS) or 24
133
+ width, height = 1080, 1350
134
+ ffmpeg_cmd = [
135
+ "ffmpeg", "-y", "-f", "rawvideo", "-vcodec", "rawvideo", "-s", f"{width}x{height}",
136
+ "-pix_fmt", "bgr24", "-r", str(fps), "-i", "pipe:0", "-vcodec", "libx264",
137
+ "-preset", "veryfast", "-crf", "22", "-pix_fmt", "yuv420p", temp_no_audio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  ]
139
+ proc = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE, stderr=subprocess.DEVNULL)
140
+ f_idx = 0
141
+ while True:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  ret, frame = cap.read()
143
+ if not ret: break
144
+ h, w = frame.shape[:2]
145
+ target_ratio = width / height
146
+ if w/h > target_ratio:
147
+ nw = int(h * target_ratio)
148
+ off = (w - nw) // 2
149
+ frame = frame[:, off:off+nw]
 
 
150
  else:
151
+ nh = int(w / target_ratio)
152
+ off = (h - nh) // 2
153
+ frame = frame[off:off+nh, :]
154
+ frame = cv2.resize(frame, (width, height))
155
+ frame = apply_color_grading(frame)
156
+ ts = f_idx / fps
157
+ for s, e, t in captions:
158
+ if s <= ts <= e:
159
+ frame = burn_captions(frame, t)
160
+ break
161
+ proc.stdin.write(frame.tobytes())
162
+ f_idx += 1
163
+ proc.stdin.close()
164
+ proc.wait()
 
 
 
 
 
 
 
 
165
  cap.release()
166
+ subprocess.run(["ffmpeg", "-y", "-i", temp_no_audio, "-i", temp_seg, "-map", "0:v:0", "-map", "1:a:0", "-c", "copy", "-shortest", output_path], capture_output=True)
167
+ return os.path.exists(output_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  except Exception as e:
169
+ add_log(f"Error in sync process: {e}")
 
 
 
 
 
 
170
  return False
 
171
  finally:
172
+ for f in [temp_seg, temp_no_audio, temp_wav]:
173
+ if os.path.exists(f): os.remove(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
+ async def run_processing_loop():
176
+ if processing_state["is_running"]: return
177
  processing_state["is_running"] = True
 
 
 
 
178
  try:
179
+ add_log("Waiting 5 seconds for server to settle...")
180
+ await asyncio.sleep(5)
181
+
182
+ # Start model loading after the 5s delay
183
+ add_log("Initiating background tasks...")
184
+ asyncio.create_task(asyncio.to_thread(_load_whisper_model))
185
+
186
+ while not processing_state["whisper_ready"]:
 
 
 
 
 
 
 
 
 
187
  await asyncio.sleep(2)
188
+
189
+ add_log("Starting repository scan...")
190
+ files = list_repo_files(repo_id=HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN)
191
+ movies = sorted(list(set(f.split("/")[1] for f in files if f.startswith(HOOKS_FOLDER + "/") and f.endswith(".json"))))
192
+
193
+ add_log(f"Found {len(movies)} movies to process")
194
+ for movie in movies:
195
+ processing_state["current_file"] = movie
196
+ add_log(f"--- Processing Movie: {movie} ---")
197
+ video_path = hf_hub_download(repo_id=HF_DATASET_REPO, filename=f"{movie}.mkv", repo_type="dataset", token=HF_TOKEN)
198
+ movie_hooks = sorted([f for f in files if f.startswith(f"{HOOKS_FOLDER}/{movie}/") and f.endswith(".json")])
199
+ add_log(f"Found {len(movie_hooks)} segments for {movie}")
200
+ temp_dir = tempfile.mkdtemp()
201
+ for hook_file in movie_hooks:
202
+ await asyncio.sleep(0.1)
203
+ hook_path = hf_hub_download(repo_id=HF_DATASET_REPO, filename=hook_file, repo_type="dataset", token=HF_TOKEN)
204
+ with open(hook_path, 'r') as f:
205
+ data = json.load(f)
206
+ num, start, end = data.get("segment_number", 1), data.get("start_time", "00:00:00"), data.get("end_time", "00:00:10")
207
+ out_name = f"segment-{num:02d}.mp4"
208
+ out_path = os.path.join(temp_dir, out_name)
209
+ add_log(f"Processing Segment {num} ({start} to {end})")
210
+ success = await asyncio.to_thread(process_video_sync, video_path, out_path, start, end)
211
+ if success:
212
+ upload_file(path_or_fileobj=out_path, path_in_repo=f"{READY_VIDEOS_FOLDER}/{movie}/{out_name}", repo_id=HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN)
213
+ add_log(f"✓ Segment {num} uploaded successfully")
214
+ else:
215
+ add_log(f"✗ Segment {num} failed")
216
+ shutil.rmtree(temp_dir)
217
+ processing_state["processed_files"].append(movie)
218
+ processing_state["total_processed"] += 1
219
+ add_log(f"Finished movie: {movie}")
220
+
221
  except Exception as e:
222
+ add_log(f"CRITICAL ERROR: {e}")
223
  processing_state["last_error"] = str(e)
224
  finally:
225
  processing_state["is_running"] = False
226
+ add_log("Background worker idle.")
227
 
228
  @app.on_event("startup")
229
  async def startup_event():
230
+ # Only kick off the main loop, which now handles the 5s delay and model loading
231
+ asyncio.create_task(run_processing_loop())
 
 
 
 
 
232
 
233
  @app.get("/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  @app.get("/status")
235
+ async def status():
236
+ return processing_state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
  if __name__ == "__main__":
239
+ add_log("Starting Video Processing Service on port 7860...")
240
+ uvicorn.run(app, host="0.0.0.0", port=7860)