MogensR commited on
Commit
c268795
·
1 Parent(s): 82f3861

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -83
app.py CHANGED
@@ -7,6 +7,7 @@
7
  FIXED: All issues identified by Grok4 - robust error handling, variable scope, codec fallbacks
8
  FIXED: Added SSR mode disable for Gradio compatibility
9
  FIXED: Audio preservation - no more missing audio in processed videos
 
10
  """
11
  import cv2
12
  import numpy as np
@@ -49,6 +50,7 @@ def patched_get_type(schema):
49
 
50
  except (ImportError, AttributeError) as e:
51
  print(f"❌ CRITICAL: Gradio patch failed: {e}")
 
52
 
53
  # Import utilities - CRITICAL: Use these functions, don't duplicate!
54
  from utilities import (
@@ -65,8 +67,10 @@ def patched_get_type(schema):
65
  try:
66
  from two_stage_processor import TwoStageProcessor, CHROMA_PRESETS
67
  TWO_STAGE_AVAILABLE = True
68
- except ImportError:
 
69
  TWO_STAGE_AVAILABLE = False
 
70
 
71
  logging.basicConfig(level=logging.INFO)
72
  logger = logging.getLogger(__name__)
@@ -101,7 +105,6 @@ def get_device():
101
  """Automatically detect the best available device (CPU or GPU) with robust error handling"""
102
  try:
103
  if torch.cuda.is_available():
104
- # Try to get device name safely
105
  try:
106
  device_name = torch.cuda.get_device_name(0)
107
  logger.info(f"Using GPU: {device_name}")
@@ -109,7 +112,6 @@ def get_device():
109
  logger.warning(f"Could not get GPU name: {e}, but CUDA is available")
110
  device_name = "CUDA GPU"
111
 
112
- # Test CUDA functionality
113
  try:
114
  test_tensor = torch.tensor([1.0], device='cuda')
115
  del test_tensor
@@ -169,7 +171,6 @@ def create_video_writer(output_path: str, fps: float, width: int, height: int) -
169
  for fourcc_str, ext in codecs_to_try:
170
  try:
171
  fourcc = cv2.VideoWriter_fourcc(*fourcc_str)
172
- # Ensure output has correct extension
173
  if not output_path.endswith(ext):
174
  base = os.path.splitext(output_path)[0]
175
  test_path = base + ext
@@ -197,7 +198,6 @@ def _prog(pct: float, desc: str):
197
  if progress_callback:
198
  progress_callback(pct, desc)
199
 
200
- # Format progress info for display in the UI
201
  if "Frame" in desc and "|" in desc:
202
  parts = desc.split("|")
203
  frame_info = parts[0].strip() if len(parts) > 0 else ""
@@ -221,61 +221,61 @@ def _prog(pct: float, desc: str):
221
  try:
222
  _prog(0.1, "Initializing SAM2...")
223
 
224
- # Check HF token and environment
225
  hf_token = os.getenv('HF_TOKEN')
226
  if not hf_token:
227
  logger.warning("No HF_TOKEN found, downloads may be rate limited")
228
 
229
- # Download checkpoint with caching and robust error handling
230
- # Use the correct Facebook repository path
231
  try:
232
  checkpoint_path = hf_hub_download(
233
- repo_id="facebook/sam2-hiera-large", # FIXED: Correct repository
234
  filename="sam2_hiera_large.pt",
235
  cache_dir=str(CACHE_DIR / "sam2_checkpoint"),
236
  force_download=False,
237
  token=hf_token
238
  )
 
239
  except Exception as e:
240
  logger.error(f"Failed to download SAM2 checkpoint: {e}")
241
  raise Exception(f"SAM2 checkpoint download failed: {e}")
242
 
243
- # Import and build
244
  try:
245
  from sam2.build_sam import build_sam2
246
  from sam2.sam2_image_predictor import SAM2ImagePredictor
 
247
  except ImportError as e:
 
248
  raise Exception(f"SAM2 import failed: {e}. Make sure SAM2 is properly installed.")
249
 
250
- # Build model with explicit config
251
  try:
252
  sam2_model = build_sam2("sam2_hiera_l.yaml", checkpoint_path)
253
  sam2_model.to(device)
254
- sam2_model.eval() # Set to evaluation mode for inference
255
  predictor = SAM2ImagePredictor(sam2_model)
 
256
  except Exception as e:
 
257
  raise Exception(f"SAM2 model creation failed: {e}")
258
 
259
- # Test the predictor with dummy data - ROBUST TYPES
260
  _prog(0.8, "Testing SAM2 functionality...")
261
  test_image = np.zeros((256, 256, 3), dtype=np.uint8)
262
  predictor.set_image(test_image)
263
 
264
- # Ensure correct types and shapes for SAM2
265
- test_points = np.array([[128.0, 128.0]], dtype=np.float32) # Explicit float32
266
- test_labels = np.array([1], dtype=np.int32) # Explicit int32
267
 
268
  try:
269
- with torch.no_grad(): # Disable gradients for inference
270
  masks, scores, _ = predictor.predict(
271
  point_coords=test_points,
272
  point_labels=test_labels,
273
  multimask_output=False
274
  )
275
  except Exception as e:
 
276
  raise Exception(f"SAM2 prediction test failed: {e}")
277
 
278
  if masks is None or len(masks) == 0:
 
279
  raise Exception("SAM2 predictor test failed - no masks generated")
280
 
281
  _prog(1.0, "SAM2 loaded and validated successfully!")
@@ -301,21 +301,23 @@ def _prog(pct: float, desc: str):
301
 
302
  try:
303
  from matanyone import InferenceCore
 
304
  except ImportError as e:
 
305
  raise Exception(f"MatAnyone import failed: {e}. Make sure MatAnyone is properly installed.")
306
 
307
  try:
308
  processor = InferenceCore("PeiqingYang/MatAnyone")
 
309
  except Exception as e:
 
310
  raise Exception(f"MatAnyone model loading failed: {e}")
311
 
312
- # Test MatAnyone with dummy data
313
  _prog(0.8, "Testing MatAnyone functionality...")
314
  test_image = np.zeros((256, 256, 3), dtype=np.uint8)
315
  test_mask = np.zeros((256, 256), dtype=np.uint8)
316
  test_mask[64:192, 64:192] = 255
317
 
318
- # Test the processor
319
  try:
320
  if hasattr(processor, 'process') or hasattr(processor, '__call__'):
321
  logger.info("MatAnyone processor interface detected")
@@ -362,6 +364,7 @@ def load_models_with_validation(progress_callback: Optional[callable] = None) ->
362
 
363
  with loading_lock:
364
  if models_loaded and not PROCESS_CANCELLED.is_set():
 
365
  return "Models already loaded and validated"
366
 
367
  try:
@@ -372,24 +375,27 @@ def load_models_with_validation(progress_callback: Optional[callable] = None) ->
372
  if progress_callback:
373
  progress_callback(0.0, f"Starting model loading on {DEVICE}...")
374
 
375
- # Load SAM2 with validation
376
  sam2_predictor = load_sam2_predictor_fixed(device=DEVICE, progress_callback=progress_callback)
377
 
378
  if PROCESS_CANCELLED.is_set():
 
379
  return "Model loading cancelled by user"
380
 
381
- # Load MatAnyone with validation
382
  matanyone_model = load_matanyone_fixed(progress_callback=progress_callback)
383
 
384
  if PROCESS_CANCELLED.is_set():
 
385
  return "Model loading cancelled by user"
386
 
387
  models_loaded = True
388
 
389
- # Initialize two-stage processor if available
390
  if TWO_STAGE_AVAILABLE:
391
- two_stage_processor = TwoStageProcessor(sam2_predictor, matanyone_model)
392
- logger.info("Two-stage processor initialized")
 
 
 
 
393
 
394
  load_time = time.time() - start_time
395
  message = f"SUCCESS: SAM2 + MatAnyone loaded and validated in {load_time:.1f}s on {DEVICE}"
@@ -421,17 +427,20 @@ def process_video_fixed(
421
  global PROCESS_CANCELLED
422
 
423
  if PROCESS_CANCELLED.is_set():
 
424
  return None, "Processing cancelled by user"
425
 
426
  if not models_loaded:
 
427
  return None, "Models not loaded. Call load_models_with_validation() first."
428
 
429
  if not video_path or not os.path.exists(video_path):
 
430
  return None, f"Video file not found: {video_path}"
431
 
432
- # Validate video file
433
  is_valid, validation_msg = validate_video_file(video_path)
434
  if not is_valid:
 
435
  return None, f"Invalid video: {validation_msg}"
436
 
437
  def _prog(pct: float, desc: str):
@@ -441,7 +450,6 @@ def _prog(pct: float, desc: str):
441
  if progress_callback:
442
  progress_callback(pct, desc)
443
 
444
- # Update processing info file
445
  if "Frame" in desc and "|" in desc:
446
  parts = desc.split("|")
447
  frame_info = parts[0].strip() if len(parts) > 0 else ""
@@ -466,29 +474,30 @@ def _prog(pct: float, desc: str):
466
  try:
467
  _prog(0.0, f"Starting {'TWO-STAGE' if use_two_stage else 'SINGLE-STAGE'} processing on {DEVICE}...")
468
 
469
- # Check if two-stage mode is requested
470
  if use_two_stage:
471
  if not TWO_STAGE_AVAILABLE:
 
472
  return None, "Two-stage mode not available. Please add two_stage_processor.py file."
473
 
474
  if two_stage_processor is None:
 
475
  return None, "Two-stage processor not initialized. Please reload models."
476
 
477
  _prog(0.05, "Starting TWO-STAGE green screen processing...")
478
 
479
- # Get video dimensions
480
  cap = cv2.VideoCapture(video_path)
481
  frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
482
  frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
483
  cap.release()
484
 
485
- # Prepare background
486
  if background_choice == "custom" and custom_background_path:
487
  if not os.path.exists(custom_background_path):
 
488
  return None, f"Custom background not found: {custom_background_path}"
489
 
490
  background = cv2.imread(custom_background_path)
491
  if background is None:
 
492
  return None, "Could not read custom background image."
493
  background_name = "Custom Image"
494
  else:
@@ -497,12 +506,11 @@ def _prog(pct: float, desc: str):
497
  background = create_professional_background(bg_config, frame_width, frame_height)
498
  background_name = bg_config["name"]
499
  else:
 
500
  return None, f"Invalid background selection: {background_choice}"
501
 
502
- # Get chroma settings
503
  chroma_settings = CHROMA_PRESETS.get(chroma_preset, CHROMA_PRESETS['standard'])
504
 
505
- # Run two-stage pipeline
506
  timestamp = int(time.time())
507
  final_output = f"/tmp/twostage_final_{timestamp}.mp4"
508
 
@@ -515,42 +523,39 @@ def _prog(pct: float, desc: str):
515
  )
516
 
517
  if PROCESS_CANCELLED.is_set():
 
518
  return None, "Processing cancelled by user"
519
 
520
  if result is None:
 
521
  return None, message
522
 
523
- # Add audio back - FIXED VERSION
524
  _prog(0.9, "Adding audio...")
525
  final_with_audio = f"/tmp/twostage_audio_{timestamp}.mp4"
526
 
527
- # First, check if input video has audio
528
  audio_check_success = run_ffmpeg_command([
529
  'ffprobe', '-v', 'quiet', '-select_streams', 'a:0',
530
  '-show_entries', 'stream=codec_name', '-of', 'csv=p=0', video_path
531
  ], "Checking for audio stream")
532
 
533
  if audio_check_success:
534
- # Input has audio - copy it with video
535
  audio_success = run_ffmpeg_command([
536
  'ffmpeg', '-y', '-i', final_output, '-i', video_path,
537
- '-c:v', 'copy', # Copy video without re-encoding (faster)
538
  '-c:a', 'aac', '-b:a', '192k', '-ac', '2', '-ar', '48000',
539
- '-map', '0:v:0', '-map', '1:a:0', '-shortest', final_with_audio # Removed the '?'
540
  ], "Two-stage audio processing with original audio")
541
 
542
  if not audio_success or not os.path.exists(final_with_audio):
543
  logger.warning("Failed with original audio, trying fallback method...")
544
- # Fallback: try different mapping
545
  audio_success = run_ffmpeg_command([
546
  'ffmpeg', '-y', '-i', video_path, '-i', final_output,
547
  '-c:v', 'libx264', '-crf', '18', '-preset', 'fast',
548
- '-c:a', 'copy', # Copy audio without re-encoding
549
  '-map', '1:v:0', '-map', '0:a:0', '-shortest', final_with_audio
550
  ], "Fallback two-stage audio processing")
551
  else:
552
  logger.info("Input video has no audio stream")
553
- # No audio in source - just use processed video
554
  try:
555
  shutil.copy2(final_output, final_with_audio)
556
  audio_success = True
@@ -581,11 +586,11 @@ def _prog(pct: float, desc: str):
581
 
582
  return final_output, success_message
583
 
584
- # Single-stage processing
585
  _prog(0.05, f"Starting SINGLE-STAGE processing on {DEVICE}...")
586
 
587
  cap = cv2.VideoCapture(video_path)
588
  if not cap.isOpened():
 
589
  return None, "Could not open video file."
590
 
591
  fps = cap.get(cv2.CAP_PROP_FPS)
@@ -595,23 +600,24 @@ def _prog(pct: float, desc: str):
595
 
596
  if total_frames == 0:
597
  cap.release()
 
598
  return None, "Video appears to be empty."
599
 
600
- # Log video info
601
  logger.info(f"Video info: {frame_width}x{frame_height}, {fps}fps, {total_frames} frames, processing on {DEVICE}")
602
 
603
- # Prepare background
604
  background = None
605
  background_name = ""
606
 
607
  if background_choice == "custom" and custom_background_path:
608
  if not os.path.exists(custom_background_path):
609
  cap.release()
 
610
  return None, f"Custom background not found: {custom_background_path}"
611
 
612
  background = cv2.imread(custom_background_path)
613
  if background is None:
614
  cap.release()
 
615
  return None, "Could not read custom background image."
616
  background_name = "Custom Image"
617
  else:
@@ -621,36 +627,35 @@ def _prog(pct: float, desc: str):
621
  background_name = bg_config["name"]
622
  else:
623
  cap.release()
 
624
  return None, f"Invalid background selection: {background_choice}"
625
 
626
  if background is None:
627
  cap.release()
 
628
  return None, "Failed to create background."
629
 
630
  timestamp = int(time.time())
631
 
632
  _prog(0.1, f"Processing {total_frames} frames with {'TWO-STAGE' if use_two_stage else 'SINGLE-STAGE'} processing on {DEVICE}...")
633
 
634
- # FIXED: Ensure output_path is always defined
635
  if preview_mask or preview_greenscreen:
636
  output_path = f"/tmp/preview_{timestamp}.mp4"
637
  else:
638
  output_path = f"/tmp/output_{timestamp}.mp4"
639
 
640
- # ROBUST: Create video writer with codec fallback
641
  final_writer, actual_output_path = create_video_writer(output_path, fps, frame_width, frame_height)
642
  if final_writer is None:
643
  cap.release()
 
644
  return None, "Could not create output video file with any codec."
645
 
646
- # Update output_path to actual path (may have different extension)
647
  output_path = actual_output_path
648
 
649
  frame_count = 0
650
  successful_frames = 0
651
  last_refined_mask = None
652
 
653
- # Processing stats
654
  start_time = time.time()
655
 
656
  try:
@@ -662,13 +667,11 @@ def _prog(pct: float, desc: str):
662
  if not ret:
663
  break
664
 
665
- # Skip frames if FRAME_SKIP > 1
666
  if frame_count % FRAME_SKIP != 0:
667
  frame_count += 1
668
  continue
669
 
670
  try:
671
- # Update progress with detailed timing info and ETA
672
  elapsed_time = time.time() - start_time
673
  current_fps = frame_count / elapsed_time if elapsed_time > 0 else 0
674
  remaining_frames = total_frames - frame_count
@@ -677,37 +680,30 @@ def _prog(pct: float, desc: str):
677
 
678
  progress_msg = f"Frame {frame_count + 1}/{total_frames} | {elapsed_time:.1f}s | {current_fps:.1f} fps | ETA: {eta_display} | Device: {DEVICE}"
679
 
680
- # Log and display progress with clamped percentage
681
  logger.info(progress_msg)
682
  pct = min(1.0, 0.1 + (frame_count / max(1, total_frames)) * 0.8)
683
  _prog(pct, progress_msg)
684
 
685
- # CRITICAL: Use functions from utilities.py, not local implementations!
686
- # SAM2 segmentation using utilities function
687
  mask = segment_person_hq(frame, sam2_predictor)
688
 
689
  if preview_mask:
690
- # Save mask visualization - proper green color in BGR format
691
  mask_vis = np.zeros_like(frame)
692
- mask_vis[..., 1] = mask # Put mask in green channel (BGR format)
693
  final_writer.write(mask_vis.astype(np.uint8))
694
  frame_count += 1
695
  continue
696
 
697
- # MatAnyone refinement on keyframes using utilities function
698
  if (frame_count % KEYFRAME_INTERVAL == 0) or (last_refined_mask is None):
699
  refined_mask = refine_mask_hq(frame, mask, matanyone_model)
700
  last_refined_mask = refined_mask.copy()
701
  logger.info(f"Keyframe refinement at frame {frame_count} on {DEVICE}")
702
  else:
703
- # Blend SAM2 mask with last refined mask for temporal smoothness
704
  alpha = 0.7
705
  refined_mask = cv2.addWeighted(mask, alpha, last_refined_mask, 1-alpha, 0)
706
 
707
  if preview_greenscreen:
708
- # Create green screen preview
709
  green_bg = np.zeros_like(frame)
710
- green_bg[:, :] = [0, 255, 0] # Pure green
711
  preview_frame = frame.copy()
712
  mask_3ch = cv2.cvtColor(refined_mask, cv2.COLOR_GRAY2BGR)
713
  mask_norm = mask_3ch.astype(float) / 255
@@ -716,22 +712,19 @@ def _prog(pct: float, desc: str):
716
  frame_count += 1
717
  continue
718
 
719
- # CRITICAL: Use replace_background_hq from utilities which has the transparency fix!
720
  result_frame = replace_background_hq(frame, refined_mask, background)
721
  final_writer.write(result_frame.astype(np.uint8))
722
  successful_frames += 1
723
 
724
  except Exception as frame_error:
725
  logger.warning(f"Error processing frame {frame_count}: {frame_error}")
726
- # Write original frame if processing fails
727
  final_writer.write(frame)
728
 
729
  frame_count += 1
730
 
731
- # Memory management
732
  if frame_count % MEMORY_CLEANUP_INTERVAL == 0:
733
  gc.collect()
734
- if DEVICE.type == 'cuda': # Use consistent device checking
735
  torch.cuda.empty_cache()
736
  elapsed = time.time() - start_time
737
  fps_actual = frame_count / elapsed
@@ -739,7 +732,6 @@ def _prog(pct: float, desc: str):
739
  logger.info(f"Progress: {frame_count}/{total_frames}, FPS: {fps_actual:.1f}, ETA: {eta:.0f}s, Device: {DEVICE}")
740
 
741
  finally:
742
- # ALWAYS cleanup resources
743
  cap.release()
744
  final_writer.release()
745
 
@@ -750,12 +742,13 @@ def _prog(pct: float, desc: str):
750
  os.remove(output_path)
751
  except:
752
  pass
 
753
  return None, "Processing cancelled by user"
754
 
755
  if successful_frames == 0:
 
756
  return None, "No frames were processed successfully with AI."
757
 
758
- # Calculate processing stats
759
  total_time = time.time() - start_time
760
  avg_fps = frame_count / total_time if total_time > 0 else 0
761
 
@@ -764,37 +757,32 @@ def _prog(pct: float, desc: str):
764
  if preview_mask or preview_greenscreen:
765
  final_output = output_path
766
  else:
767
- # Add audio back for final output - FIXED VERSION
768
  _prog(0.9, "Adding audio...")
769
  final_output = f"/tmp/final_{timestamp}.mp4"
770
 
771
- # First, check if input video has audio
772
  audio_check_success = run_ffmpeg_command([
773
  'ffprobe', '-v', 'quiet', '-select_streams', 'a:0',
774
  '-show_entries', 'stream=codec_name', '-of', 'csv=p=0', video_path
775
  ], "Checking for audio stream")
776
 
777
  if audio_check_success:
778
- # Input has audio - copy it with video
779
  audio_success = run_ffmpeg_command([
780
  'ffmpeg', '-y', '-i', output_path, '-i', video_path,
781
- '-c:v', 'copy', # Copy video without re-encoding (faster)
782
  '-c:a', 'aac', '-b:a', '192k', '-ac', '2', '-ar', '48000',
783
- '-map', '0:v:0', '-map', '1:a:0', '-shortest', final_output # Removed the '?'
784
  ], "Audio processing with original audio")
785
 
786
  if not audio_success or not os.path.exists(final_output):
787
  logger.warning("Failed with original audio, trying fallback method...")
788
- # Fallback: try different mapping
789
  audio_success = run_ffmpeg_command([
790
  'ffmpeg', '-y', '-i', video_path, '-i', output_path,
791
  '-c:v', 'libx264', '-crf', '18', '-preset', 'fast',
792
- '-c:a', 'copy', # Copy audio without re-encoding
793
  '-map', '1:v:0', '-map', '0:a:0', '-shortest', final_output
794
  ], "Fallback audio processing")
795
  else:
796
  logger.info("Input video has no audio stream")
797
- # No audio in source - just use processed video
798
  try:
799
  shutil.copy2(output_path, final_output)
800
  audio_success = True
@@ -811,7 +799,6 @@ def _prog(pct: float, desc: str):
811
  logger.error(f"Failed to copy video: {e}")
812
  final_output = output_path
813
 
814
- # Cleanup intermediate file
815
  try:
816
  if os.path.exists(output_path) and output_path != final_output:
817
  os.remove(output_path)
@@ -851,28 +838,46 @@ def main():
851
  print(f"Device: {DEVICE}")
852
  print("Loading UI components...")
853
 
854
- # Import UI components
855
- from ui_components import create_interface
 
 
 
 
 
856
 
857
  os.makedirs("/tmp/MyAvatar/My_Videos/", exist_ok=True)
858
  CACHE_DIR.mkdir(exist_ok=True, parents=True)
859
 
860
  print("Creating interface...")
861
- demo = create_interface()
 
 
 
 
 
 
862
 
863
  print("Launching...")
864
- # Fixed for HF Spaces - share=True is required when localhost not accessible
865
- demo.queue().launch(
866
- server_name="0.0.0.0",
867
- server_port=7860,
868
- share=True, # Required for HF Spaces
869
- show_error=True,
870
- debug=True
871
- )
 
 
 
 
 
872
 
873
  except Exception as e:
874
  logger.error(f"Startup failed: {e}")
 
875
  print(f"Startup failed: {e}")
 
876
 
877
  if __name__ == "__main__":
878
  main()
 
7
  FIXED: All issues identified by Grok4 - robust error handling, variable scope, codec fallbacks
8
  FIXED: Added SSR mode disable for Gradio compatibility
9
  FIXED: Audio preservation - no more missing audio in processed videos
10
+ UPDATE: Enhanced logging for initialization errors, isolated matanyone imports to avoid GUI crashes
11
  """
12
  import cv2
13
  import numpy as np
 
50
 
51
  except (ImportError, AttributeError) as e:
52
  print(f"❌ CRITICAL: Gradio patch failed: {e}")
53
+ logger.error(f"Gradio patch failed: {e}")
54
 
55
  # Import utilities - CRITICAL: Use these functions, don't duplicate!
56
  from utilities import (
 
67
  try:
68
  from two_stage_processor import TwoStageProcessor, CHROMA_PRESETS
69
  TWO_STAGE_AVAILABLE = True
70
+ logger.info("Two-stage processor available")
71
+ except ImportError as e:
72
  TWO_STAGE_AVAILABLE = False
73
+ logger.warning(f"Two-stage processor not available: {e}")
74
 
75
  logging.basicConfig(level=logging.INFO)
76
  logger = logging.getLogger(__name__)
 
105
  """Automatically detect the best available device (CPU or GPU) with robust error handling"""
106
  try:
107
  if torch.cuda.is_available():
 
108
  try:
109
  device_name = torch.cuda.get_device_name(0)
110
  logger.info(f"Using GPU: {device_name}")
 
112
  logger.warning(f"Could not get GPU name: {e}, but CUDA is available")
113
  device_name = "CUDA GPU"
114
 
 
115
  try:
116
  test_tensor = torch.tensor([1.0], device='cuda')
117
  del test_tensor
 
171
  for fourcc_str, ext in codecs_to_try:
172
  try:
173
  fourcc = cv2.VideoWriter_fourcc(*fourcc_str)
 
174
  if not output_path.endswith(ext):
175
  base = os.path.splitext(output_path)[0]
176
  test_path = base + ext
 
198
  if progress_callback:
199
  progress_callback(pct, desc)
200
 
 
201
  if "Frame" in desc and "|" in desc:
202
  parts = desc.split("|")
203
  frame_info = parts[0].strip() if len(parts) > 0 else ""
 
221
  try:
222
  _prog(0.1, "Initializing SAM2...")
223
 
 
224
  hf_token = os.getenv('HF_TOKEN')
225
  if not hf_token:
226
  logger.warning("No HF_TOKEN found, downloads may be rate limited")
227
 
 
 
228
  try:
229
  checkpoint_path = hf_hub_download(
230
+ repo_id="facebook/sam2-hiera-large",
231
  filename="sam2_hiera_large.pt",
232
  cache_dir=str(CACHE_DIR / "sam2_checkpoint"),
233
  force_download=False,
234
  token=hf_token
235
  )
236
+ logger.info(f"SAM2 checkpoint downloaded to {checkpoint_path}")
237
  except Exception as e:
238
  logger.error(f"Failed to download SAM2 checkpoint: {e}")
239
  raise Exception(f"SAM2 checkpoint download failed: {e}")
240
 
 
241
  try:
242
  from sam2.build_sam import build_sam2
243
  from sam2.sam2_image_predictor import SAM2ImagePredictor
244
+ logger.info("SAM2 modules imported successfully")
245
  except ImportError as e:
246
+ logger.error(f"SAM2 import failed: {e}")
247
  raise Exception(f"SAM2 import failed: {e}. Make sure SAM2 is properly installed.")
248
 
 
249
  try:
250
  sam2_model = build_sam2("sam2_hiera_l.yaml", checkpoint_path)
251
  sam2_model.to(device)
252
+ sam2_model.eval()
253
  predictor = SAM2ImagePredictor(sam2_model)
254
+ logger.info(f"SAM2 model built and moved to {device}")
255
  except Exception as e:
256
+ logger.error(f"SAM2 model creation failed: {e}")
257
  raise Exception(f"SAM2 model creation failed: {e}")
258
 
 
259
  _prog(0.8, "Testing SAM2 functionality...")
260
  test_image = np.zeros((256, 256, 3), dtype=np.uint8)
261
  predictor.set_image(test_image)
262
 
263
+ test_points = np.array([[128.0, 128.0]], dtype=np.float32)
264
+ test_labels = np.array([1], dtype=np.int32)
 
265
 
266
  try:
267
+ with torch.no_grad():
268
  masks, scores, _ = predictor.predict(
269
  point_coords=test_points,
270
  point_labels=test_labels,
271
  multimask_output=False
272
  )
273
  except Exception as e:
274
+ logger.error(f"SAM2 prediction test failed: {e}")
275
  raise Exception(f"SAM2 prediction test failed: {e}")
276
 
277
  if masks is None or len(masks) == 0:
278
+ logger.error("SAM2 predictor test failed - no masks generated")
279
  raise Exception("SAM2 predictor test failed - no masks generated")
280
 
281
  _prog(1.0, "SAM2 loaded and validated successfully!")
 
301
 
302
  try:
303
  from matanyone import InferenceCore
304
+ logger.info("Successfully imported MatAnyone InferenceCore")
305
  except ImportError as e:
306
+ logger.error(f"MatAnyone import failed: {e}")
307
  raise Exception(f"MatAnyone import failed: {e}. Make sure MatAnyone is properly installed.")
308
 
309
  try:
310
  processor = InferenceCore("PeiqingYang/MatAnyone")
311
+ logger.info("MatAnyone InferenceCore initialized")
312
  except Exception as e:
313
+ logger.error(f"MatAnyone model loading failed: {e}")
314
  raise Exception(f"MatAnyone model loading failed: {e}")
315
 
 
316
  _prog(0.8, "Testing MatAnyone functionality...")
317
  test_image = np.zeros((256, 256, 3), dtype=np.uint8)
318
  test_mask = np.zeros((256, 256), dtype=np.uint8)
319
  test_mask[64:192, 64:192] = 255
320
 
 
321
  try:
322
  if hasattr(processor, 'process') or hasattr(processor, '__call__'):
323
  logger.info("MatAnyone processor interface detected")
 
364
 
365
  with loading_lock:
366
  if models_loaded and not PROCESS_CANCELLED.is_set():
367
+ logger.info("Models already loaded and validated")
368
  return "Models already loaded and validated"
369
 
370
  try:
 
375
  if progress_callback:
376
  progress_callback(0.0, f"Starting model loading on {DEVICE}...")
377
 
 
378
  sam2_predictor = load_sam2_predictor_fixed(device=DEVICE, progress_callback=progress_callback)
379
 
380
  if PROCESS_CANCELLED.is_set():
381
+ logger.info("Model loading cancelled by user")
382
  return "Model loading cancelled by user"
383
 
 
384
  matanyone_model = load_matanyone_fixed(progress_callback=progress_callback)
385
 
386
  if PROCESS_CANCELLED.is_set():
387
+ logger.info("Model loading cancelled by user")
388
  return "Model loading cancelled by user"
389
 
390
  models_loaded = True
391
 
 
392
  if TWO_STAGE_AVAILABLE:
393
+ try:
394
+ two_stage_processor = TwoStageProcessor(sam2_predictor, matanyone_model)
395
+ logger.info("Two-stage processor initialized")
396
+ except Exception as e:
397
+ logger.warning(f"Two-stage processor initialization failed: {e}")
398
+ TWO_STAGE_AVAILABLE = False
399
 
400
  load_time = time.time() - start_time
401
  message = f"SUCCESS: SAM2 + MatAnyone loaded and validated in {load_time:.1f}s on {DEVICE}"
 
427
  global PROCESS_CANCELLED
428
 
429
  if PROCESS_CANCELLED.is_set():
430
+ logger.info("Processing cancelled by user")
431
  return None, "Processing cancelled by user"
432
 
433
  if not models_loaded:
434
+ logger.error("Models not loaded")
435
  return None, "Models not loaded. Call load_models_with_validation() first."
436
 
437
  if not video_path or not os.path.exists(video_path):
438
+ logger.error(f"Video file not found: {video_path}")
439
  return None, f"Video file not found: {video_path}"
440
 
 
441
  is_valid, validation_msg = validate_video_file(video_path)
442
  if not is_valid:
443
+ logger.error(f"Invalid video: {validation_msg}")
444
  return None, f"Invalid video: {validation_msg}"
445
 
446
  def _prog(pct: float, desc: str):
 
450
  if progress_callback:
451
  progress_callback(pct, desc)
452
 
 
453
  if "Frame" in desc and "|" in desc:
454
  parts = desc.split("|")
455
  frame_info = parts[0].strip() if len(parts) > 0 else ""
 
474
  try:
475
  _prog(0.0, f"Starting {'TWO-STAGE' if use_two_stage else 'SINGLE-STAGE'} processing on {DEVICE}...")
476
 
 
477
  if use_two_stage:
478
  if not TWO_STAGE_AVAILABLE:
479
+ logger.error("Two-stage mode not available")
480
  return None, "Two-stage mode not available. Please add two_stage_processor.py file."
481
 
482
  if two_stage_processor is None:
483
+ logger.error("Two-stage processor not initialized")
484
  return None, "Two-stage processor not initialized. Please reload models."
485
 
486
  _prog(0.05, "Starting TWO-STAGE green screen processing...")
487
 
 
488
  cap = cv2.VideoCapture(video_path)
489
  frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
490
  frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
491
  cap.release()
492
 
 
493
  if background_choice == "custom" and custom_background_path:
494
  if not os.path.exists(custom_background_path):
495
+ logger.error(f"Custom background not found: {custom_background_path}")
496
  return None, f"Custom background not found: {custom_background_path}"
497
 
498
  background = cv2.imread(custom_background_path)
499
  if background is None:
500
+ logger.error("Could not read custom background image")
501
  return None, "Could not read custom background image."
502
  background_name = "Custom Image"
503
  else:
 
506
  background = create_professional_background(bg_config, frame_width, frame_height)
507
  background_name = bg_config["name"]
508
  else:
509
+ logger.error(f"Invalid background selection: {background_choice}")
510
  return None, f"Invalid background selection: {background_choice}"
511
 
 
512
  chroma_settings = CHROMA_PRESETS.get(chroma_preset, CHROMA_PRESETS['standard'])
513
 
 
514
  timestamp = int(time.time())
515
  final_output = f"/tmp/twostage_final_{timestamp}.mp4"
516
 
 
523
  )
524
 
525
  if PROCESS_CANCELLED.is_set():
526
+ logger.info("Processing cancelled by user")
527
  return None, "Processing cancelled by user"
528
 
529
  if result is None:
530
+ logger.error(f"Two-stage processing failed: {message}")
531
  return None, message
532
 
 
533
  _prog(0.9, "Adding audio...")
534
  final_with_audio = f"/tmp/twostage_audio_{timestamp}.mp4"
535
 
 
536
  audio_check_success = run_ffmpeg_command([
537
  'ffprobe', '-v', 'quiet', '-select_streams', 'a:0',
538
  '-show_entries', 'stream=codec_name', '-of', 'csv=p=0', video_path
539
  ], "Checking for audio stream")
540
 
541
  if audio_check_success:
 
542
  audio_success = run_ffmpeg_command([
543
  'ffmpeg', '-y', '-i', final_output, '-i', video_path,
544
+ '-c:v', 'copy',
545
  '-c:a', 'aac', '-b:a', '192k', '-ac', '2', '-ar', '48000',
546
+ '-map', '0:v:0', '-map', '1:a:0', '-shortest', final_with_audio
547
  ], "Two-stage audio processing with original audio")
548
 
549
  if not audio_success or not os.path.exists(final_with_audio):
550
  logger.warning("Failed with original audio, trying fallback method...")
 
551
  audio_success = run_ffmpeg_command([
552
  'ffmpeg', '-y', '-i', video_path, '-i', final_output,
553
  '-c:v', 'libx264', '-crf', '18', '-preset', 'fast',
554
+ '-c:a', 'copy',
555
  '-map', '1:v:0', '-map', '0:a:0', '-shortest', final_with_audio
556
  ], "Fallback two-stage audio processing")
557
  else:
558
  logger.info("Input video has no audio stream")
 
559
  try:
560
  shutil.copy2(final_output, final_with_audio)
561
  audio_success = True
 
586
 
587
  return final_output, success_message
588
 
 
589
  _prog(0.05, f"Starting SINGLE-STAGE processing on {DEVICE}...")
590
 
591
  cap = cv2.VideoCapture(video_path)
592
  if not cap.isOpened():
593
+ logger.error("Could not open video file")
594
  return None, "Could not open video file."
595
 
596
  fps = cap.get(cv2.CAP_PROP_FPS)
 
600
 
601
  if total_frames == 0:
602
  cap.release()
603
+ logger.error("Video appears to be empty")
604
  return None, "Video appears to be empty."
605
 
 
606
  logger.info(f"Video info: {frame_width}x{frame_height}, {fps}fps, {total_frames} frames, processing on {DEVICE}")
607
 
 
608
  background = None
609
  background_name = ""
610
 
611
  if background_choice == "custom" and custom_background_path:
612
  if not os.path.exists(custom_background_path):
613
  cap.release()
614
+ logger.error(f"Custom background not found: {custom_background_path}")
615
  return None, f"Custom background not found: {custom_background_path}"
616
 
617
  background = cv2.imread(custom_background_path)
618
  if background is None:
619
  cap.release()
620
+ logger.error("Could not read custom background image")
621
  return None, "Could not read custom background image."
622
  background_name = "Custom Image"
623
  else:
 
627
  background_name = bg_config["name"]
628
  else:
629
  cap.release()
630
+ logger.error(f"Invalid background selection: {background_choice}")
631
  return None, f"Invalid background selection: {background_choice}"
632
 
633
  if background is None:
634
  cap.release()
635
+ logger.error("Failed to create background")
636
  return None, "Failed to create background."
637
 
638
  timestamp = int(time.time())
639
 
640
  _prog(0.1, f"Processing {total_frames} frames with {'TWO-STAGE' if use_two_stage else 'SINGLE-STAGE'} processing on {DEVICE}...")
641
 
 
642
  if preview_mask or preview_greenscreen:
643
  output_path = f"/tmp/preview_{timestamp}.mp4"
644
  else:
645
  output_path = f"/tmp/output_{timestamp}.mp4"
646
 
 
647
  final_writer, actual_output_path = create_video_writer(output_path, fps, frame_width, frame_height)
648
  if final_writer is None:
649
  cap.release()
650
+ logger.error("Could not create output video file with any codec")
651
  return None, "Could not create output video file with any codec."
652
 
 
653
  output_path = actual_output_path
654
 
655
  frame_count = 0
656
  successful_frames = 0
657
  last_refined_mask = None
658
 
 
659
  start_time = time.time()
660
 
661
  try:
 
667
  if not ret:
668
  break
669
 
 
670
  if frame_count % FRAME_SKIP != 0:
671
  frame_count += 1
672
  continue
673
 
674
  try:
 
675
  elapsed_time = time.time() - start_time
676
  current_fps = frame_count / elapsed_time if elapsed_time > 0 else 0
677
  remaining_frames = total_frames - frame_count
 
680
 
681
  progress_msg = f"Frame {frame_count + 1}/{total_frames} | {elapsed_time:.1f}s | {current_fps:.1f} fps | ETA: {eta_display} | Device: {DEVICE}"
682
 
 
683
  logger.info(progress_msg)
684
  pct = min(1.0, 0.1 + (frame_count / max(1, total_frames)) * 0.8)
685
  _prog(pct, progress_msg)
686
 
 
 
687
  mask = segment_person_hq(frame, sam2_predictor)
688
 
689
  if preview_mask:
 
690
  mask_vis = np.zeros_like(frame)
691
+ mask_vis[..., 1] = mask
692
  final_writer.write(mask_vis.astype(np.uint8))
693
  frame_count += 1
694
  continue
695
 
 
696
  if (frame_count % KEYFRAME_INTERVAL == 0) or (last_refined_mask is None):
697
  refined_mask = refine_mask_hq(frame, mask, matanyone_model)
698
  last_refined_mask = refined_mask.copy()
699
  logger.info(f"Keyframe refinement at frame {frame_count} on {DEVICE}")
700
  else:
 
701
  alpha = 0.7
702
  refined_mask = cv2.addWeighted(mask, alpha, last_refined_mask, 1-alpha, 0)
703
 
704
  if preview_greenscreen:
 
705
  green_bg = np.zeros_like(frame)
706
+ green_bg[:, :] = [0, 255, 0]
707
  preview_frame = frame.copy()
708
  mask_3ch = cv2.cvtColor(refined_mask, cv2.COLOR_GRAY2BGR)
709
  mask_norm = mask_3ch.astype(float) / 255
 
712
  frame_count += 1
713
  continue
714
 
 
715
  result_frame = replace_background_hq(frame, refined_mask, background)
716
  final_writer.write(result_frame.astype(np.uint8))
717
  successful_frames += 1
718
 
719
  except Exception as frame_error:
720
  logger.warning(f"Error processing frame {frame_count}: {frame_error}")
 
721
  final_writer.write(frame)
722
 
723
  frame_count += 1
724
 
 
725
  if frame_count % MEMORY_CLEANUP_INTERVAL == 0:
726
  gc.collect()
727
+ if DEVICE.type == 'cuda':
728
  torch.cuda.empty_cache()
729
  elapsed = time.time() - start_time
730
  fps_actual = frame_count / elapsed
 
732
  logger.info(f"Progress: {frame_count}/{total_frames}, FPS: {fps_actual:.1f}, ETA: {eta:.0f}s, Device: {DEVICE}")
733
 
734
  finally:
 
735
  cap.release()
736
  final_writer.release()
737
 
 
742
  os.remove(output_path)
743
  except:
744
  pass
745
+ logger.info("Processing cancelled by user")
746
  return None, "Processing cancelled by user"
747
 
748
  if successful_frames == 0:
749
+ logger.error("No frames were processed successfully with AI")
750
  return None, "No frames were processed successfully with AI."
751
 
 
752
  total_time = time.time() - start_time
753
  avg_fps = frame_count / total_time if total_time > 0 else 0
754
 
 
757
  if preview_mask or preview_greenscreen:
758
  final_output = output_path
759
  else:
 
760
  _prog(0.9, "Adding audio...")
761
  final_output = f"/tmp/final_{timestamp}.mp4"
762
 
 
763
  audio_check_success = run_ffmpeg_command([
764
  'ffprobe', '-v', 'quiet', '-select_streams', 'a:0',
765
  '-show_entries', 'stream=codec_name', '-of', 'csv=p=0', video_path
766
  ], "Checking for audio stream")
767
 
768
  if audio_check_success:
 
769
  audio_success = run_ffmpeg_command([
770
  'ffmpeg', '-y', '-i', output_path, '-i', video_path,
771
+ '-c:v', 'copy',
772
  '-c:a', 'aac', '-b:a', '192k', '-ac', '2', '-ar', '48000',
773
+ '-map', '0:v:0', '-map', '1:a:0', '-shortest', final_output
774
  ], "Audio processing with original audio")
775
 
776
  if not audio_success or not os.path.exists(final_output):
777
  logger.warning("Failed with original audio, trying fallback method...")
 
778
  audio_success = run_ffmpeg_command([
779
  'ffmpeg', '-y', '-i', video_path, '-i', output_path,
780
  '-c:v', 'libx264', '-crf', '18', '-preset', 'fast',
781
+ '-c:a', 'copy',
782
  '-map', '1:v:0', '-map', '0:a:0', '-shortest', final_output
783
  ], "Fallback audio processing")
784
  else:
785
  logger.info("Input video has no audio stream")
 
786
  try:
787
  shutil.copy2(output_path, final_output)
788
  audio_success = True
 
799
  logger.error(f"Failed to copy video: {e}")
800
  final_output = output_path
801
 
 
802
  try:
803
  if os.path.exists(output_path) and output_path != final_output:
804
  os.remove(output_path)
 
838
  print(f"Device: {DEVICE}")
839
  print("Loading UI components...")
840
 
841
+ try:
842
+ from ui_components import create_interface
843
+ logger.info("Successfully imported ui_components")
844
+ except ImportError as e:
845
+ logger.error(f"Failed to import ui_components: {e}")
846
+ logger.error(f"Full traceback: {traceback.format_exc()}")
847
+ raise Exception(f"UI components import failed: {e}")
848
 
849
  os.makedirs("/tmp/MyAvatar/My_Videos/", exist_ok=True)
850
  CACHE_DIR.mkdir(exist_ok=True, parents=True)
851
 
852
  print("Creating interface...")
853
+ try:
854
+ demo = create_interface()
855
+ logger.info("Gradio interface created successfully")
856
+ except Exception as e:
857
+ logger.error(f"Failed to create Gradio interface: {e}")
858
+ logger.error(f"Full traceback: {traceback.format_exc()}")
859
+ raise Exception(f"Gradio interface creation failed: {e}")
860
 
861
  print("Launching...")
862
+ try:
863
+ demo.queue().launch(
864
+ server_name="0.0.0.0",
865
+ server_port=7860,
866
+ share=True,
867
+ show_error=True,
868
+ debug=True
869
+ )
870
+ logger.info("Gradio server launched successfully")
871
+ except Exception as e:
872
+ logger.error(f"Gradio launch failed: {e}")
873
+ logger.error(f"Full traceback: {traceback.format_exc()}")
874
+ raise Exception(f"Gradio launch failed: {e}")
875
 
876
  except Exception as e:
877
  logger.error(f"Startup failed: {e}")
878
+ logger.error(f"Full traceback: {traceback.format_exc()}")
879
  print(f"Startup failed: {e}")
880
+ raise
881
 
882
  if __name__ == "__main__":
883
  main()