MogensR commited on
Commit
789ff6d
·
verified ·
1 Parent(s): ad94470

Update processing/two_stage/two_stage_processor.py

Browse files
processing/two_stage/two_stage_processor.py CHANGED
@@ -5,10 +5,13 @@
5
  Stage 1: SAM2 creates reference mask from first 3 frames
6
  Stage 2: MatAnyone processes entire video → pha.mp4
7
  Stage 3: Create green screen video using pha.mp4 as alpha matte
8
- Stage 4: Composite green screen video with background → final.mp4
9
- NEW: Auto-detection of optimal chroma key threshold
10
- NEW: Chunked processing for long videos with memory management
11
- FIXED: Memory test disabled to prevent predictor deletion
 
 
 
12
  """
13
 
14
  # ==============================================================================
@@ -76,9 +79,14 @@ class ProcessingConfig:
76
  # Auto-detection settings
77
  AUTO_DETECTION_FRAMES = 5 # Number of frames to analyze for auto-detection
78
  AUTO_DETECTION_FALLBACK = 0.1 # Fallback threshold if auto-detection fails
79
- MIN_THRESHOLD = 0.05 # Minimum allowed threshold
80
  MAX_THRESHOLD = 0.3 # Maximum allowed threshold
81
 
 
 
 
 
 
82
  # Quality settings
83
  VIDEO_CODEC = 'libx264'
84
  VIDEO_BITRATE = '8000k'
@@ -514,10 +522,10 @@ class TwoStageProcessor:
514
  1. SAM2 creates reference mask from first few frames
515
  2. MatAnyone processes entire video using reference mask → pha.mp4
516
  3. Create green screen video using pha.mp4 as alpha matte
517
- 4. Composite green screen video with background → final.mp4
518
- 5. Auto-detects optimal chroma key threshold
519
  6. Chunked processing for long videos with memory management
520
- 7. FIXED: Memory test disabled to prevent predictor deletion
521
  """
522
 
523
  def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
@@ -709,11 +717,11 @@ def _process_single_video(self,
709
  if self.memory_manager.should_clear_memory():
710
  self.memory_manager.cleanup_stage("GreenScreen")
711
 
712
- # Stage 4: Final compositing
713
  if callback:
714
- callback("Final compositing with background...", 90)
715
- logger.info("STAGE 4: Final compositing with background...")
716
- final_path = self._stage4_final_compositing(green_screen_path, background_path, output_path, quality)
717
 
718
  # Final memory cleanup
719
  self.memory_manager.cleanup_stage("Final")
@@ -982,7 +990,7 @@ def _trim_video(self, input_path: str, output_path: str, seconds: int):
982
  raise
983
 
984
  # ==============================================================================
985
- # CHAPTER 10: STAGE 3 - GREEN SCREEN VIDEO CREATION
986
  # ==============================================================================
987
 
988
  @MemoryTester.monitor_memory_during_processing
@@ -1096,54 +1104,55 @@ def _stage3_create_green_screen_video(self, original_video_path: str, alpha_vide
1096
  raise
1097
 
1098
  # ==============================================================================
1099
- # CHAPTER 11: STAGE 4 - FINAL COMPOSITING WITH AUTO-DETECTION
1100
  # ==============================================================================
1101
 
1102
- @MemoryTester.monitor_memory_during_processing
1103
- def _stage4_final_compositing(self, green_screen_path: str, background_path: str, output_path: str, quality: str) -> str:
 
1104
  """
1105
- Stage 4: Final compositing of green screen video with background.
1106
 
1107
- Uses MoviePy for high-quality chroma key compositing with auto-detected threshold.
1108
  """
1109
  try:
1110
  # Get quality profile
1111
  profile = QualityManager.get_profile(quality)
1112
 
1113
- # Load green screen video
1114
  green_clip = VideoFileClip(green_screen_path)
1115
 
1116
- # Load and prepare background
1117
  if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
1118
- # Video background
1119
  bg_clip = VideoFileClip(background_path)
1120
- # Loop background if shorter than green screen video
1121
  if bg_clip.duration < green_clip.duration:
1122
  bg_clip = bg_clip.loop(duration=green_clip.duration)
1123
- # Trim background if longer
1124
  bg_clip = bg_clip.subclip(0, green_clip.duration)
1125
  else:
1126
- # Image background
1127
  bg_clip = ImageClip(background_path, duration=green_clip.duration)
1128
 
1129
- # Resize background to match green screen video
1130
  bg_clip = bg_clip.resize((green_clip.w, green_clip.h))
1131
 
1132
- # Auto-detect optimal chroma key threshold
1133
- logger.info("Auto-detecting optimal chroma key threshold...")
1134
- optimal_threshold = self._auto_detect_chroma_threshold(green_screen_path)
1135
- logger.info(f"Auto-detected threshold: {optimal_threshold}")
 
 
 
 
 
 
 
1136
 
1137
- # Apply chroma key with auto-detected threshold
1138
  green_screen_keyed = green_clip.fx(vfx.mask_color,
1139
  color=self.config.GREEN_COLOR_NORMALIZED,
1140
- thr=optimal_threshold, # Use auto-detected value
1141
- s=0.2) # Increased smoothing
1142
 
1143
- # Composite layers
1144
  final_clip = CompositeVideoClip([bg_clip, green_screen_keyed])
1145
 
1146
- # Write final video
1147
  write_params = {
1148
  'codec': self.config.VIDEO_CODEC,
1149
  'bitrate': profile['bitrate'],
@@ -1152,7 +1161,6 @@ def _stage4_final_compositing(self, green_screen_path: str, background_path: str
1152
  'logger': None
1153
  }
1154
 
1155
- # Add CRF if specified
1156
  if 'crf' in profile:
1157
  write_params['ffmpeg_params'] = ['-crf', str(profile['crf'])]
1158
 
@@ -1160,21 +1168,182 @@ def _stage4_final_compositing(self, green_screen_path: str, background_path: str
1160
 
1161
  # Cleanup
1162
  green_clip.close()
1163
- bg_clip.close()
1164
  final_clip.close()
1165
 
1166
- # Verify output
1167
  if not os.path.exists(output_path):
1168
  raise RuntimeError("Final output file was not created")
1169
-
1170
- file_size = os.path.getsize(output_path) / (1024 * 1024) # MB
1171
- logger.info(f"✅ Final compositing completed: {output_path} ({file_size:.1f} MB)")
1172
 
1173
  return output_path
1174
 
1175
  except Exception as e:
1176
- logger.error(f"Stage 4 failed: {e}")
1177
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1178
 
1179
  # ==============================================================================
1180
  # CHAPTER 12: AUTO CHROMA KEY THRESHOLD DETECTION
@@ -1182,16 +1351,16 @@ def _stage4_final_compositing(self, green_screen_path: str, background_path: str
1182
 
1183
  def _auto_detect_chroma_threshold(self, green_screen_path: str) -> float:
1184
  """
1185
- Auto-detect optimal chroma key threshold by analyzing green screen video.
1186
 
1187
  This method:
1188
  1. Samples frames from the green screen video
1189
  2. Identifies green background pixels vs person pixels
1190
  3. Calculates color distance between greenest background and person
1191
- 4. Sets threshold as 80% of that distance for safety margin
1192
  """
1193
  try:
1194
- logger.info("Analyzing green screen video for auto-threshold detection...")
1195
 
1196
  # Open green screen video
1197
  cap = cv2.VideoCapture(green_screen_path)
@@ -1201,7 +1370,6 @@ def _auto_detect_chroma_threshold(self, green_screen_path: str) -> float:
1201
 
1202
  # Get video properties
1203
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
1204
- fps = cap.get(cv2.CAP_PROP_FPS)
1205
 
1206
  # Sample frames evenly across the video
1207
  frame_indices = np.linspace(0, total_frames - 1,
@@ -1239,13 +1407,13 @@ def _auto_detect_chroma_threshold(self, green_screen_path: str) -> float:
1239
 
1240
  logger.info(f"Analyzed {len(green_pixels)} green pixels and {len(person_pixels)} person pixels")
1241
 
1242
- # Calculate optimal threshold
1243
- threshold = self._calculate_optimal_threshold(green_pixels, person_pixels)
1244
 
1245
  # Clamp to safe range
1246
  threshold = np.clip(threshold, self.config.MIN_THRESHOLD, self.config.MAX_THRESHOLD)
1247
 
1248
- logger.info(f"Calculated threshold: {threshold:.3f}")
1249
  return threshold
1250
 
1251
  except Exception as e:
@@ -1277,12 +1445,9 @@ def _analyze_frame_colors(self, frame_rgb: np.ndarray) -> Tuple[list, list]:
1277
  )
1278
 
1279
  # Sample pixels (subsample for performance)
1280
- h, w = frame_rgb.shape[:2]
1281
-
1282
  # Sample green pixels
1283
  green_coords = np.where(green_mask)
1284
  if len(green_coords[0]) > 1000:
1285
- # Subsample if too many
1286
  indices = np.random.choice(len(green_coords[0]), 1000, replace=False)
1287
  green_coords = (green_coords[0][indices], green_coords[1][indices])
1288
 
@@ -1291,7 +1456,6 @@ def _analyze_frame_colors(self, frame_rgb: np.ndarray) -> Tuple[list, list]:
1291
  # Sample person pixels
1292
  person_coords = np.where(person_mask)
1293
  if len(person_coords[0]) > 1000:
1294
- # Subsample if too many
1295
  indices = np.random.choice(len(person_coords[0]), 1000, replace=False)
1296
  person_coords = (person_coords[0][indices], person_coords[1][indices])
1297
 
@@ -1303,9 +1467,9 @@ def _analyze_frame_colors(self, frame_rgb: np.ndarray) -> Tuple[list, list]:
1303
  logger.warning(f"Frame color analysis failed: {e}")
1304
  return [], []
1305
 
1306
- def _calculate_optimal_threshold(self, green_pixels: np.ndarray, person_pixels: np.ndarray) -> float:
1307
  """
1308
- Calculate optimal threshold based on color analysis.
1309
 
1310
  Strategy:
1311
  1. Find the "least green" green pixels (edge of green screen)
@@ -1320,7 +1484,6 @@ def _calculate_optimal_threshold(self, green_pixels: np.ndarray, person_pixels:
1320
  person_lab = self._rgb_to_lab_batch(person_pixels)
1321
 
1322
  # Use clustering to find representative colors
1323
- # Find the least green of the green pixels
1324
  kmeans_green = KMeans(n_clusters=min(5, len(green_lab)//10), random_state=42, n_init=10)
1325
  green_clusters = kmeans_green.fit_predict(green_lab)
1326
  green_centers = kmeans_green.cluster_centers_
@@ -1343,11 +1506,10 @@ def _calculate_optimal_threshold(self, green_pixels: np.ndarray, person_pixels:
1343
  color_distance = np.linalg.norm(main_green_cluster - closest_person_cluster)
1344
 
1345
  # Convert LAB distance to threshold
1346
- # LAB distances typically range 0-100, we want threshold 0-1
1347
- # Use 60% of the distance as threshold for safety margin
1348
- threshold = (color_distance / 100.0) * 0.6
1349
 
1350
- logger.info(f"Color distance analysis: LAB distance={color_distance:.2f}, threshold={threshold:.3f}")
1351
 
1352
  return threshold
1353
  else:
@@ -1356,7 +1518,6 @@ def _calculate_optimal_threshold(self, green_pixels: np.ndarray, person_pixels:
1356
 
1357
  except Exception as e:
1358
  logger.warning(f"Threshold calculation failed: {e}")
1359
- # Fallback: analyze in RGB space
1360
  return self._simple_rgb_threshold(green_pixels, person_pixels)
1361
 
1362
  def _simple_rgb_threshold(self, green_pixels: np.ndarray, person_pixels: np.ndarray) -> float:
@@ -1369,8 +1530,8 @@ def _simple_rgb_threshold(self, green_pixels: np.ndarray, person_pixels: np.ndar
1369
  green_distances = np.linalg.norm(person_pixels - avg_green, axis=1)
1370
  min_distance = np.min(green_distances)
1371
 
1372
- # Use 70% of minimum distance as threshold
1373
- threshold = min_distance * 0.7
1374
 
1375
  logger.info(f"RGB fallback: min_distance={min_distance:.3f}, threshold={threshold:.3f}")
1376
  return threshold
@@ -1466,8 +1627,7 @@ def get_processing_stats(self) -> Dict[str, Any]:
1466
 
1467
  def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
1468
  """
1469
-
1470
- Factory function to create TwoStageProcessor with compatibility layer.
1471
 
1472
  This provides a clean interface for integration with existing systems.
1473
  """
@@ -1483,6 +1643,7 @@ def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
1483
  config = ProcessingConfig()
1484
  logger.info(f"Default config: {config.REFERENCE_FRAMES} reference frames, green={config.GREEN_COLOR}")
1485
  logger.info(f"Auto-detection: {config.AUTO_DETECTION_FRAMES} analysis frames, fallback={config.AUTO_DETECTION_FALLBACK}")
 
1486
  logger.info(f"Chunked processing: {config.MAX_CHUNK_DURATION}s chunks with {config.CHUNK_OVERLAP_FRAMES} frame overlap")
1487
  logger.info(f"Memory management: Cache clearing={'ON' if config.CLEAR_CACHE_AFTER_STAGE else 'OFF'}")
1488
  logger.info(f"Memory testing: {'DISABLED' if not config.ENABLE_MEMORY_TESTING else 'ENABLED'}")
 
5
  Stage 1: SAM2 creates reference mask from first 3 frames
6
  Stage 2: MatAnyone processes entire video → pha.mp4
7
  Stage 3: Create green screen video using pha.mp4 as alpha matte
8
+ Stage 4: Adaptive chroma key compositing with background → final.mp4
9
+ FEATURES:
10
+ - Auto-detection of optimal chroma key threshold
11
+ - Adaptive iteration to find perfect threshold
12
+ - Chunked processing for long videos with memory management
13
+ - Dimension mismatch handling
14
+ - Memory optimization with light/deep cleanup modes
15
  """
16
 
17
  # ==============================================================================
 
79
  # Auto-detection settings
80
  AUTO_DETECTION_FRAMES = 5 # Number of frames to analyze for auto-detection
81
  AUTO_DETECTION_FALLBACK = 0.1 # Fallback threshold if auto-detection fails
82
+ MIN_THRESHOLD = 0.02 # Minimum allowed threshold
83
  MAX_THRESHOLD = 0.3 # Maximum allowed threshold
84
 
85
+ # Adaptive optimization settings
86
+ ADAPTIVE_MAX_ITERATIONS = 10 # Maximum iterations for threshold optimization
87
+ ADAPTIVE_GREEN_TOLERANCE = 0.01 # Acceptable green residue level
88
+ ADAPTIVE_TRANSPARENCY_TOLERANCE = 0.1 # Acceptable transparency quality
89
+
90
  # Quality settings
91
  VIDEO_CODEC = 'libx264'
92
  VIDEO_BITRATE = '8000k'
 
522
  1. SAM2 creates reference mask from first few frames
523
  2. MatAnyone processes entire video using reference mask → pha.mp4
524
  3. Create green screen video using pha.mp4 as alpha matte
525
+ 4. Adaptive chroma key compositing with background → final.mp4
526
+ 5. Auto-detects and iteratively optimizes chroma key threshold
527
  6. Chunked processing for long videos with memory management
528
+ 7. Dimension mismatch handling for robust processing
529
  """
530
 
531
  def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
 
717
  if self.memory_manager.should_clear_memory():
718
  self.memory_manager.cleanup_stage("GreenScreen")
719
 
720
+ # Stage 4: Adaptive final compositing
721
  if callback:
722
+ callback("Adaptive compositing with background...", 90)
723
+ logger.info("STAGE 4: Adaptive compositing with background...")
724
+ final_path = self._stage4_adaptive_compositing(green_screen_path, background_path, output_path, quality)
725
 
726
  # Final memory cleanup
727
  self.memory_manager.cleanup_stage("Final")
 
990
  raise
991
 
992
  # ==============================================================================
993
+ # CHAPTER 10: STAGE 3 - GREEN SCREEN VIDEO CREATION WITH DIMENSION FIX
994
  # ==============================================================================
995
 
996
  @MemoryTester.monitor_memory_during_processing
 
1104
  raise
1105
 
1106
  # ==============================================================================
1107
+ # CHAPTER 11: STAGE 4 - ADAPTIVE CHROMA KEY COMPOSITING
1108
  # ==============================================================================
1109
 
1110
+ @MemoryTester.monitor_memory_during_processing
1111
+ def _stage4_adaptive_compositing(self, green_screen_path: str, background_path: str,
1112
+ output_path: str, quality: str) -> str:
1113
  """
1114
+ Stage 4: Final compositing with adaptive threshold optimization.
1115
 
1116
+ Iteratively adjusts chroma key threshold until green is properly removed.
1117
  """
1118
  try:
1119
  # Get quality profile
1120
  profile = QualityManager.get_profile(quality)
1121
 
1122
+ # Load videos/images
1123
  green_clip = VideoFileClip(green_screen_path)
1124
 
 
1125
  if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
 
1126
  bg_clip = VideoFileClip(background_path)
 
1127
  if bg_clip.duration < green_clip.duration:
1128
  bg_clip = bg_clip.loop(duration=green_clip.duration)
 
1129
  bg_clip = bg_clip.subclip(0, green_clip.duration)
1130
  else:
 
1131
  bg_clip = ImageClip(background_path, duration=green_clip.duration)
1132
 
 
1133
  bg_clip = bg_clip.resize((green_clip.w, green_clip.h))
1134
 
1135
+ # Start with auto-detected threshold
1136
+ initial_threshold = self._auto_detect_chroma_threshold(green_screen_path)
1137
+ logger.info(f"Initial auto-detected threshold: {initial_threshold:.4f}")
1138
+
1139
+ # Adaptive optimization
1140
+ best_threshold = self._optimize_chroma_threshold(
1141
+ green_clip, bg_clip, initial_threshold,
1142
+ max_iterations=self.config.ADAPTIVE_MAX_ITERATIONS
1143
+ )
1144
+
1145
+ logger.info(f"✅ Optimized threshold: {best_threshold:.4f} (started from {initial_threshold:.4f})")
1146
 
1147
+ # Apply final chroma key with optimized threshold
1148
  green_screen_keyed = green_clip.fx(vfx.mask_color,
1149
  color=self.config.GREEN_COLOR_NORMALIZED,
1150
+ thr=best_threshold,
1151
+ s=0.1) # Reduced smoothing for sharper edges
1152
 
1153
+ # Composite and write
1154
  final_clip = CompositeVideoClip([bg_clip, green_screen_keyed])
1155
 
 
1156
  write_params = {
1157
  'codec': self.config.VIDEO_CODEC,
1158
  'bitrate': profile['bitrate'],
 
1161
  'logger': None
1162
  }
1163
 
 
1164
  if 'crf' in profile:
1165
  write_params['ffmpeg_params'] = ['-crf', str(profile['crf'])]
1166
 
 
1168
 
1169
  # Cleanup
1170
  green_clip.close()
1171
+ bg_clip.close()
1172
  final_clip.close()
1173
 
 
1174
  if not os.path.exists(output_path):
1175
  raise RuntimeError("Final output file was not created")
1176
+
1177
+ file_size = os.path.getsize(output_path) / (1024 * 1024)
1178
+ logger.info(f"✅ Adaptive compositing completed: {output_path} ({file_size:.1f} MB)")
1179
 
1180
  return output_path
1181
 
1182
  except Exception as e:
1183
+ logger.error(f"Stage 4 adaptive failed: {e}")
1184
  raise
1185
+
1186
+ def _optimize_chroma_threshold(self, green_clip, bg_clip, initial_threshold: float,
1187
+ max_iterations: int = 10) -> float:
1188
+ """
1189
+ Iteratively optimize chroma key threshold by analyzing output quality.
1190
+ """
1191
+ threshold = initial_threshold
1192
+ best_threshold = threshold
1193
+ best_score = float('inf')
1194
+
1195
+ # Binary search bounds
1196
+ low = max(self.config.MIN_THRESHOLD, initial_threshold * 0.5)
1197
+ high = min(self.config.MAX_THRESHOLD, initial_threshold * 2.0)
1198
+
1199
+ # Track tested thresholds to avoid repetition
1200
+ tested_thresholds = set()
1201
+
1202
+ for iteration in range(max_iterations):
1203
+ # Round threshold to avoid tiny differences
1204
+ threshold = round(threshold, 4)
1205
+
1206
+ # Skip if already tested
1207
+ if threshold in tested_thresholds:
1208
+ logger.info(f" Threshold {threshold:.4f} already tested, adjusting...")
1209
+ threshold = (low + high) / 2
1210
+ continue
1211
+
1212
+ tested_thresholds.add(threshold)
1213
+ logger.info(f"🔄 Optimization iteration {iteration + 1}/{max_iterations}, testing threshold: {threshold:.4f}")
1214
+
1215
+ # Apply chroma key with current threshold
1216
+ keyed = green_clip.fx(vfx.mask_color,
1217
+ color=self.config.GREEN_COLOR_NORMALIZED,
1218
+ thr=threshold,
1219
+ s=0.1)
1220
+
1221
+ # Composite
1222
+ test_composite = CompositeVideoClip([bg_clip, keyed])
1223
+
1224
+ # Extract test frames (beginning, middle, end)
1225
+ test_times = [
1226
+ green_clip.duration * 0.1,
1227
+ green_clip.duration * 0.5,
1228
+ green_clip.duration * 0.9
1229
+ ]
1230
+
1231
+ total_green_score = 0
1232
+ total_transparency_score = 0
1233
+
1234
+ for test_time in test_times:
1235
+ test_frame = test_composite.get_frame(test_time)
1236
+ bg_frame = bg_clip.get_frame(test_time)
1237
+
1238
+ # Analyze the frame for green residue
1239
+ green_score = self._analyze_green_residue(test_frame)
1240
+ transparency_score = self._analyze_transparency_quality(test_frame, bg_frame)
1241
+
1242
+ total_green_score += green_score
1243
+ total_transparency_score += transparency_score
1244
+
1245
+ # Average scores
1246
+ avg_green_score = total_green_score / len(test_times)
1247
+ avg_transparency_score = total_transparency_score / len(test_times)
1248
+
1249
+ # Combined score (lower is better)
1250
+ total_score = avg_green_score + avg_transparency_score * 0.5
1251
+
1252
+ logger.info(f" 📊 Green residue: {avg_green_score:.4f}, Transparency: {avg_transparency_score:.4f}, Total: {total_score:.4f}")
1253
+
1254
+ # Update best if improved
1255
+ if total_score < best_score:
1256
+ best_score = total_score
1257
+ best_threshold = threshold
1258
+ logger.info(f" ✅ New best threshold: {best_threshold:.4f} (score: {best_score:.4f})")
1259
+
1260
+ # Check if we're good enough
1261
+ if avg_green_score < self.config.ADAPTIVE_GREEN_TOLERANCE and \
1262
+ avg_transparency_score < self.config.ADAPTIVE_TRANSPARENCY_TOLERANCE:
1263
+ logger.info(f" 🎯 Acceptable quality reached! Stopping optimization.")
1264
+ break
1265
+
1266
+ # Adjust threshold using binary search
1267
+ if avg_green_score > 0.05: # Too much green remains
1268
+ logger.info(f" 🟢 Too much green, decreasing threshold")
1269
+ high = threshold
1270
+ threshold = (low + threshold) / 2
1271
+ elif avg_transparency_score > 0.3: # Too much was removed
1272
+ logger.info(f" 👤 Subject too transparent, increasing threshold")
1273
+ low = threshold
1274
+ threshold = (threshold + high) / 2
1275
+ else:
1276
+ # Fine-tune around current value
1277
+ if avg_green_score > avg_transparency_score:
1278
+ threshold *= 0.95 # Slightly more aggressive
1279
+ else:
1280
+ threshold *= 1.05 # Slightly less aggressive
1281
+
1282
+ # Ensure we stay in bounds
1283
+ threshold = np.clip(threshold, self.config.MIN_THRESHOLD, self.config.MAX_THRESHOLD)
1284
+
1285
+ # Clean up test composite
1286
+ test_composite.close()
1287
+ keyed.close()
1288
+
1289
+ # Stop if converged
1290
+ if abs(threshold - best_threshold) < 0.001 and iteration > 3:
1291
+ logger.info(" 📍 Converged, stopping optimization")
1292
+ break
1293
+
1294
+ return best_threshold
1295
+
1296
+ def _analyze_green_residue(self, frame: np.ndarray) -> float:
1297
+ """
1298
+ Analyze how much green remains in the frame.
1299
+ Returns score from 0 (no green) to 1 (lots of green).
1300
+ """
1301
+ # Convert to float
1302
+ img = frame.astype(np.float32) / 255.0
1303
+
1304
+ # Detect pure green pixels
1305
+ green_pixels = (
1306
+ (img[:,:,1] > 0.7) & # High green
1307
+ (img[:,:,0] < 0.3) & # Low red
1308
+ (img[:,:,2] < 0.3) # Low blue
1309
+ )
1310
+
1311
+ # Calculate percentage of green pixels
1312
+ green_ratio = np.sum(green_pixels) / (frame.shape[0] * frame.shape[1])
1313
+
1314
+ # Also check for greenish tint in other pixels
1315
+ greenish_pixels = (
1316
+ (img[:,:,1] > img[:,:,0] * 1.5) & # Green > Red * 1.5
1317
+ (img[:,:,1] > img[:,:,2] * 1.5) & # Green > Blue * 1.5
1318
+ (img[:,:,1] > 0.4) # Significant green
1319
+ )
1320
+
1321
+ greenish_ratio = np.sum(greenish_pixels) / (frame.shape[0] * frame.shape[1])
1322
+
1323
+ # Combined score
1324
+ score = green_ratio + greenish_ratio * 0.3
1325
+
1326
+ return min(1.0, score)
1327
+
1328
+ def _analyze_transparency_quality(self, composite_frame: np.ndarray, bg_frame: np.ndarray) -> float:
1329
+ """
1330
+ Analyze if too much of the subject was removed.
1331
+ Returns score from 0 (good) to 1 (too much removed).
1332
+ """
1333
+ # Calculate difference between composite and background
1334
+ diff = np.abs(composite_frame.astype(np.float32) - bg_frame.astype(np.float32))
1335
+
1336
+ # Sum of differences (more difference = more of subject preserved)
1337
+ total_diff = np.sum(diff) / (255.0 * 3 * composite_frame.shape[0] * composite_frame.shape[1])
1338
+
1339
+ # If difference is too small, too much was removed
1340
+ if total_diff < 0.05: # Less than 5% different from background
1341
+ return 1.0 # Bad - subject was removed
1342
+ elif total_diff > 0.3: # More than 30% different
1343
+ return 0.0 # Good - subject well preserved
1344
+ else:
1345
+ # Linear interpolation
1346
+ return 1.0 - (total_diff - 0.05) / 0.25
1347
 
1348
  # ==============================================================================
1349
  # CHAPTER 12: AUTO CHROMA KEY THRESHOLD DETECTION
 
1351
 
1352
  def _auto_detect_chroma_threshold(self, green_screen_path: str) -> float:
1353
  """
1354
+ Auto-detect initial chroma key threshold by analyzing green screen video.
1355
 
1356
  This method:
1357
  1. Samples frames from the green screen video
1358
  2. Identifies green background pixels vs person pixels
1359
  3. Calculates color distance between greenest background and person
1360
+ 4. Sets threshold as percentage of that distance for initial guess
1361
  """
1362
  try:
1363
+ logger.info("Analyzing green screen video for initial threshold detection...")
1364
 
1365
  # Open green screen video
1366
  cap = cv2.VideoCapture(green_screen_path)
 
1370
 
1371
  # Get video properties
1372
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 
1373
 
1374
  # Sample frames evenly across the video
1375
  frame_indices = np.linspace(0, total_frames - 1,
 
1407
 
1408
  logger.info(f"Analyzed {len(green_pixels)} green pixels and {len(person_pixels)} person pixels")
1409
 
1410
+ # Calculate initial threshold
1411
+ threshold = self._calculate_initial_threshold(green_pixels, person_pixels)
1412
 
1413
  # Clamp to safe range
1414
  threshold = np.clip(threshold, self.config.MIN_THRESHOLD, self.config.MAX_THRESHOLD)
1415
 
1416
+ logger.info(f"Initial threshold calculated: {threshold:.4f}")
1417
  return threshold
1418
 
1419
  except Exception as e:
 
1445
  )
1446
 
1447
  # Sample pixels (subsample for performance)
 
 
1448
  # Sample green pixels
1449
  green_coords = np.where(green_mask)
1450
  if len(green_coords[0]) > 1000:
 
1451
  indices = np.random.choice(len(green_coords[0]), 1000, replace=False)
1452
  green_coords = (green_coords[0][indices], green_coords[1][indices])
1453
 
 
1456
  # Sample person pixels
1457
  person_coords = np.where(person_mask)
1458
  if len(person_coords[0]) > 1000:
 
1459
  indices = np.random.choice(len(person_coords[0]), 1000, replace=False)
1460
  person_coords = (person_coords[0][indices], person_coords[1][indices])
1461
 
 
1467
  logger.warning(f"Frame color analysis failed: {e}")
1468
  return [], []
1469
 
1470
+ def _calculate_initial_threshold(self, green_pixels: np.ndarray, person_pixels: np.ndarray) -> float:
1471
  """
1472
+ Calculate initial threshold based on color analysis.
1473
 
1474
  Strategy:
1475
  1. Find the "least green" green pixels (edge of green screen)
 
1484
  person_lab = self._rgb_to_lab_batch(person_pixels)
1485
 
1486
  # Use clustering to find representative colors
 
1487
  kmeans_green = KMeans(n_clusters=min(5, len(green_lab)//10), random_state=42, n_init=10)
1488
  green_clusters = kmeans_green.fit_predict(green_lab)
1489
  green_centers = kmeans_green.cluster_centers_
 
1506
  color_distance = np.linalg.norm(main_green_cluster - closest_person_cluster)
1507
 
1508
  # Convert LAB distance to threshold
1509
+ # Start conservative - use 40% of distance for initial guess
1510
+ threshold = (color_distance / 100.0) * 0.4
 
1511
 
1512
+ logger.info(f"Color distance analysis: LAB distance={color_distance:.2f}, initial threshold={threshold:.3f}")
1513
 
1514
  return threshold
1515
  else:
 
1518
 
1519
  except Exception as e:
1520
  logger.warning(f"Threshold calculation failed: {e}")
 
1521
  return self._simple_rgb_threshold(green_pixels, person_pixels)
1522
 
1523
  def _simple_rgb_threshold(self, green_pixels: np.ndarray, person_pixels: np.ndarray) -> float:
 
1530
  green_distances = np.linalg.norm(person_pixels - avg_green, axis=1)
1531
  min_distance = np.min(green_distances)
1532
 
1533
+ # Use 50% of minimum distance as initial threshold
1534
+ threshold = min_distance * 0.5
1535
 
1536
  logger.info(f"RGB fallback: min_distance={min_distance:.3f}, threshold={threshold:.3f}")
1537
  return threshold
 
1627
 
1628
  def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
1629
  """
1630
+ Factory function to create TwoStageProcessor with compatibility layer.
 
1631
 
1632
  This provides a clean interface for integration with existing systems.
1633
  """
 
1643
  config = ProcessingConfig()
1644
  logger.info(f"Default config: {config.REFERENCE_FRAMES} reference frames, green={config.GREEN_COLOR}")
1645
  logger.info(f"Auto-detection: {config.AUTO_DETECTION_FRAMES} analysis frames, fallback={config.AUTO_DETECTION_FALLBACK}")
1646
+ logger.info(f"Adaptive optimization: {config.ADAPTIVE_MAX_ITERATIONS} max iterations")
1647
  logger.info(f"Chunked processing: {config.MAX_CHUNK_DURATION}s chunks with {config.CHUNK_OVERLAP_FRAMES} frame overlap")
1648
  logger.info(f"Memory management: Cache clearing={'ON' if config.CLEAR_CACHE_AFTER_STAGE else 'OFF'}")
1649
  logger.info(f"Memory testing: {'DISABLED' if not config.ENABLE_MEMORY_TESTING else 'ENABLED'}")