Update processing/two_stage/two_stage_processor.py
Browse files
processing/two_stage/two_stage_processor.py
CHANGED
|
@@ -5,10 +5,13 @@
|
|
| 5 |
Stage 1: SAM2 creates reference mask from first 3 frames
|
| 6 |
Stage 2: MatAnyone processes entire video → pha.mp4
|
| 7 |
Stage 3: Create green screen video using pha.mp4 as alpha matte
|
| 8 |
-
Stage 4:
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
| 12 |
"""
|
| 13 |
|
| 14 |
# ==============================================================================
|
|
@@ -76,9 +79,14 @@ class ProcessingConfig:
|
|
| 76 |
# Auto-detection settings
|
| 77 |
AUTO_DETECTION_FRAMES = 5 # Number of frames to analyze for auto-detection
|
| 78 |
AUTO_DETECTION_FALLBACK = 0.1 # Fallback threshold if auto-detection fails
|
| 79 |
-
MIN_THRESHOLD = 0.
|
| 80 |
MAX_THRESHOLD = 0.3 # Maximum allowed threshold
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
# Quality settings
|
| 83 |
VIDEO_CODEC = 'libx264'
|
| 84 |
VIDEO_BITRATE = '8000k'
|
|
@@ -514,10 +522,10 @@ class TwoStageProcessor:
|
|
| 514 |
1. SAM2 creates reference mask from first few frames
|
| 515 |
2. MatAnyone processes entire video using reference mask → pha.mp4
|
| 516 |
3. Create green screen video using pha.mp4 as alpha matte
|
| 517 |
-
4.
|
| 518 |
-
5. Auto-detects
|
| 519 |
6. Chunked processing for long videos with memory management
|
| 520 |
-
7.
|
| 521 |
"""
|
| 522 |
|
| 523 |
def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
|
|
@@ -709,11 +717,11 @@ def _process_single_video(self,
|
|
| 709 |
if self.memory_manager.should_clear_memory():
|
| 710 |
self.memory_manager.cleanup_stage("GreenScreen")
|
| 711 |
|
| 712 |
-
# Stage 4:
|
| 713 |
if callback:
|
| 714 |
-
callback("
|
| 715 |
-
logger.info("STAGE 4:
|
| 716 |
-
final_path = self.
|
| 717 |
|
| 718 |
# Final memory cleanup
|
| 719 |
self.memory_manager.cleanup_stage("Final")
|
|
@@ -982,7 +990,7 @@ def _trim_video(self, input_path: str, output_path: str, seconds: int):
|
|
| 982 |
raise
|
| 983 |
|
| 984 |
# ==============================================================================
|
| 985 |
-
# CHAPTER 10: STAGE 3 - GREEN SCREEN VIDEO CREATION
|
| 986 |
# ==============================================================================
|
| 987 |
|
| 988 |
@MemoryTester.monitor_memory_during_processing
|
|
@@ -1096,54 +1104,55 @@ def _stage3_create_green_screen_video(self, original_video_path: str, alpha_vide
|
|
| 1096 |
raise
|
| 1097 |
|
| 1098 |
# ==============================================================================
|
| 1099 |
-
# CHAPTER 11: STAGE 4 -
|
| 1100 |
# ==============================================================================
|
| 1101 |
|
| 1102 |
-
@MemoryTester.monitor_memory_during_processing
|
| 1103 |
-
def
|
|
|
|
| 1104 |
"""
|
| 1105 |
-
Stage 4: Final compositing
|
| 1106 |
|
| 1107 |
-
|
| 1108 |
"""
|
| 1109 |
try:
|
| 1110 |
# Get quality profile
|
| 1111 |
profile = QualityManager.get_profile(quality)
|
| 1112 |
|
| 1113 |
-
# Load
|
| 1114 |
green_clip = VideoFileClip(green_screen_path)
|
| 1115 |
|
| 1116 |
-
# Load and prepare background
|
| 1117 |
if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
|
| 1118 |
-
# Video background
|
| 1119 |
bg_clip = VideoFileClip(background_path)
|
| 1120 |
-
# Loop background if shorter than green screen video
|
| 1121 |
if bg_clip.duration < green_clip.duration:
|
| 1122 |
bg_clip = bg_clip.loop(duration=green_clip.duration)
|
| 1123 |
-
# Trim background if longer
|
| 1124 |
bg_clip = bg_clip.subclip(0, green_clip.duration)
|
| 1125 |
else:
|
| 1126 |
-
# Image background
|
| 1127 |
bg_clip = ImageClip(background_path, duration=green_clip.duration)
|
| 1128 |
|
| 1129 |
-
# Resize background to match green screen video
|
| 1130 |
bg_clip = bg_clip.resize((green_clip.w, green_clip.h))
|
| 1131 |
|
| 1132 |
-
#
|
| 1133 |
-
|
| 1134 |
-
|
| 1135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1136 |
|
| 1137 |
-
# Apply chroma key with
|
| 1138 |
green_screen_keyed = green_clip.fx(vfx.mask_color,
|
| 1139 |
color=self.config.GREEN_COLOR_NORMALIZED,
|
| 1140 |
-
thr=
|
| 1141 |
-
s=0.
|
| 1142 |
|
| 1143 |
-
# Composite
|
| 1144 |
final_clip = CompositeVideoClip([bg_clip, green_screen_keyed])
|
| 1145 |
|
| 1146 |
-
# Write final video
|
| 1147 |
write_params = {
|
| 1148 |
'codec': self.config.VIDEO_CODEC,
|
| 1149 |
'bitrate': profile['bitrate'],
|
|
@@ -1152,7 +1161,6 @@ def _stage4_final_compositing(self, green_screen_path: str, background_path: str
|
|
| 1152 |
'logger': None
|
| 1153 |
}
|
| 1154 |
|
| 1155 |
-
# Add CRF if specified
|
| 1156 |
if 'crf' in profile:
|
| 1157 |
write_params['ffmpeg_params'] = ['-crf', str(profile['crf'])]
|
| 1158 |
|
|
@@ -1160,21 +1168,182 @@ def _stage4_final_compositing(self, green_screen_path: str, background_path: str
|
|
| 1160 |
|
| 1161 |
# Cleanup
|
| 1162 |
green_clip.close()
|
| 1163 |
-
bg_clip.close()
|
| 1164 |
final_clip.close()
|
| 1165 |
|
| 1166 |
-
# Verify output
|
| 1167 |
if not os.path.exists(output_path):
|
| 1168 |
raise RuntimeError("Final output file was not created")
|
| 1169 |
-
|
| 1170 |
-
file_size = os.path.getsize(output_path) / (1024 * 1024)
|
| 1171 |
-
logger.info(f"✅
|
| 1172 |
|
| 1173 |
return output_path
|
| 1174 |
|
| 1175 |
except Exception as e:
|
| 1176 |
-
logger.error(f"Stage 4 failed: {e}")
|
| 1177 |
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1178 |
|
| 1179 |
# ==============================================================================
|
| 1180 |
# CHAPTER 12: AUTO CHROMA KEY THRESHOLD DETECTION
|
|
@@ -1182,16 +1351,16 @@ def _stage4_final_compositing(self, green_screen_path: str, background_path: str
|
|
| 1182 |
|
| 1183 |
def _auto_detect_chroma_threshold(self, green_screen_path: str) -> float:
|
| 1184 |
"""
|
| 1185 |
-
Auto-detect
|
| 1186 |
|
| 1187 |
This method:
|
| 1188 |
1. Samples frames from the green screen video
|
| 1189 |
2. Identifies green background pixels vs person pixels
|
| 1190 |
3. Calculates color distance between greenest background and person
|
| 1191 |
-
4. Sets threshold as
|
| 1192 |
"""
|
| 1193 |
try:
|
| 1194 |
-
logger.info("Analyzing green screen video for
|
| 1195 |
|
| 1196 |
# Open green screen video
|
| 1197 |
cap = cv2.VideoCapture(green_screen_path)
|
|
@@ -1201,7 +1370,6 @@ def _auto_detect_chroma_threshold(self, green_screen_path: str) -> float:
|
|
| 1201 |
|
| 1202 |
# Get video properties
|
| 1203 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 1204 |
-
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 1205 |
|
| 1206 |
# Sample frames evenly across the video
|
| 1207 |
frame_indices = np.linspace(0, total_frames - 1,
|
|
@@ -1239,13 +1407,13 @@ def _auto_detect_chroma_threshold(self, green_screen_path: str) -> float:
|
|
| 1239 |
|
| 1240 |
logger.info(f"Analyzed {len(green_pixels)} green pixels and {len(person_pixels)} person pixels")
|
| 1241 |
|
| 1242 |
-
# Calculate
|
| 1243 |
-
threshold = self.
|
| 1244 |
|
| 1245 |
# Clamp to safe range
|
| 1246 |
threshold = np.clip(threshold, self.config.MIN_THRESHOLD, self.config.MAX_THRESHOLD)
|
| 1247 |
|
| 1248 |
-
logger.info(f"
|
| 1249 |
return threshold
|
| 1250 |
|
| 1251 |
except Exception as e:
|
|
@@ -1277,12 +1445,9 @@ def _analyze_frame_colors(self, frame_rgb: np.ndarray) -> Tuple[list, list]:
|
|
| 1277 |
)
|
| 1278 |
|
| 1279 |
# Sample pixels (subsample for performance)
|
| 1280 |
-
h, w = frame_rgb.shape[:2]
|
| 1281 |
-
|
| 1282 |
# Sample green pixels
|
| 1283 |
green_coords = np.where(green_mask)
|
| 1284 |
if len(green_coords[0]) > 1000:
|
| 1285 |
-
# Subsample if too many
|
| 1286 |
indices = np.random.choice(len(green_coords[0]), 1000, replace=False)
|
| 1287 |
green_coords = (green_coords[0][indices], green_coords[1][indices])
|
| 1288 |
|
|
@@ -1291,7 +1456,6 @@ def _analyze_frame_colors(self, frame_rgb: np.ndarray) -> Tuple[list, list]:
|
|
| 1291 |
# Sample person pixels
|
| 1292 |
person_coords = np.where(person_mask)
|
| 1293 |
if len(person_coords[0]) > 1000:
|
| 1294 |
-
# Subsample if too many
|
| 1295 |
indices = np.random.choice(len(person_coords[0]), 1000, replace=False)
|
| 1296 |
person_coords = (person_coords[0][indices], person_coords[1][indices])
|
| 1297 |
|
|
@@ -1303,9 +1467,9 @@ def _analyze_frame_colors(self, frame_rgb: np.ndarray) -> Tuple[list, list]:
|
|
| 1303 |
logger.warning(f"Frame color analysis failed: {e}")
|
| 1304 |
return [], []
|
| 1305 |
|
| 1306 |
-
def
|
| 1307 |
"""
|
| 1308 |
-
Calculate
|
| 1309 |
|
| 1310 |
Strategy:
|
| 1311 |
1. Find the "least green" green pixels (edge of green screen)
|
|
@@ -1320,7 +1484,6 @@ def _calculate_optimal_threshold(self, green_pixels: np.ndarray, person_pixels:
|
|
| 1320 |
person_lab = self._rgb_to_lab_batch(person_pixels)
|
| 1321 |
|
| 1322 |
# Use clustering to find representative colors
|
| 1323 |
-
# Find the least green of the green pixels
|
| 1324 |
kmeans_green = KMeans(n_clusters=min(5, len(green_lab)//10), random_state=42, n_init=10)
|
| 1325 |
green_clusters = kmeans_green.fit_predict(green_lab)
|
| 1326 |
green_centers = kmeans_green.cluster_centers_
|
|
@@ -1343,11 +1506,10 @@ def _calculate_optimal_threshold(self, green_pixels: np.ndarray, person_pixels:
|
|
| 1343 |
color_distance = np.linalg.norm(main_green_cluster - closest_person_cluster)
|
| 1344 |
|
| 1345 |
# Convert LAB distance to threshold
|
| 1346 |
-
#
|
| 1347 |
-
|
| 1348 |
-
threshold = (color_distance / 100.0) * 0.6
|
| 1349 |
|
| 1350 |
-
logger.info(f"Color distance analysis: LAB distance={color_distance:.2f}, threshold={threshold:.3f}")
|
| 1351 |
|
| 1352 |
return threshold
|
| 1353 |
else:
|
|
@@ -1356,7 +1518,6 @@ def _calculate_optimal_threshold(self, green_pixels: np.ndarray, person_pixels:
|
|
| 1356 |
|
| 1357 |
except Exception as e:
|
| 1358 |
logger.warning(f"Threshold calculation failed: {e}")
|
| 1359 |
-
# Fallback: analyze in RGB space
|
| 1360 |
return self._simple_rgb_threshold(green_pixels, person_pixels)
|
| 1361 |
|
| 1362 |
def _simple_rgb_threshold(self, green_pixels: np.ndarray, person_pixels: np.ndarray) -> float:
|
|
@@ -1369,8 +1530,8 @@ def _simple_rgb_threshold(self, green_pixels: np.ndarray, person_pixels: np.ndar
|
|
| 1369 |
green_distances = np.linalg.norm(person_pixels - avg_green, axis=1)
|
| 1370 |
min_distance = np.min(green_distances)
|
| 1371 |
|
| 1372 |
-
# Use
|
| 1373 |
-
threshold = min_distance * 0.
|
| 1374 |
|
| 1375 |
logger.info(f"RGB fallback: min_distance={min_distance:.3f}, threshold={threshold:.3f}")
|
| 1376 |
return threshold
|
|
@@ -1466,8 +1627,7 @@ def get_processing_stats(self) -> Dict[str, Any]:
|
|
| 1466 |
|
| 1467 |
def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
|
| 1468 |
"""
|
| 1469 |
-
|
| 1470 |
-
Factory function to create TwoStageProcessor with compatibility layer.
|
| 1471 |
|
| 1472 |
This provides a clean interface for integration with existing systems.
|
| 1473 |
"""
|
|
@@ -1483,6 +1643,7 @@ def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
|
|
| 1483 |
config = ProcessingConfig()
|
| 1484 |
logger.info(f"Default config: {config.REFERENCE_FRAMES} reference frames, green={config.GREEN_COLOR}")
|
| 1485 |
logger.info(f"Auto-detection: {config.AUTO_DETECTION_FRAMES} analysis frames, fallback={config.AUTO_DETECTION_FALLBACK}")
|
|
|
|
| 1486 |
logger.info(f"Chunked processing: {config.MAX_CHUNK_DURATION}s chunks with {config.CHUNK_OVERLAP_FRAMES} frame overlap")
|
| 1487 |
logger.info(f"Memory management: Cache clearing={'ON' if config.CLEAR_CACHE_AFTER_STAGE else 'OFF'}")
|
| 1488 |
logger.info(f"Memory testing: {'DISABLED' if not config.ENABLE_MEMORY_TESTING else 'ENABLED'}")
|
|
|
|
| 5 |
Stage 1: SAM2 creates reference mask from first 3 frames
|
| 6 |
Stage 2: MatAnyone processes entire video → pha.mp4
|
| 7 |
Stage 3: Create green screen video using pha.mp4 as alpha matte
|
| 8 |
+
Stage 4: Adaptive chroma key compositing with background → final.mp4
|
| 9 |
+
FEATURES:
|
| 10 |
+
- Auto-detection of optimal chroma key threshold
|
| 11 |
+
- Adaptive iteration to find perfect threshold
|
| 12 |
+
- Chunked processing for long videos with memory management
|
| 13 |
+
- Dimension mismatch handling
|
| 14 |
+
- Memory optimization with light/deep cleanup modes
|
| 15 |
"""
|
| 16 |
|
| 17 |
# ==============================================================================
|
|
|
|
| 79 |
# Auto-detection settings
|
| 80 |
AUTO_DETECTION_FRAMES = 5 # Number of frames to analyze for auto-detection
|
| 81 |
AUTO_DETECTION_FALLBACK = 0.1 # Fallback threshold if auto-detection fails
|
| 82 |
+
MIN_THRESHOLD = 0.02 # Minimum allowed threshold
|
| 83 |
MAX_THRESHOLD = 0.3 # Maximum allowed threshold
|
| 84 |
|
| 85 |
+
# Adaptive optimization settings
|
| 86 |
+
ADAPTIVE_MAX_ITERATIONS = 10 # Maximum iterations for threshold optimization
|
| 87 |
+
ADAPTIVE_GREEN_TOLERANCE = 0.01 # Acceptable green residue level
|
| 88 |
+
ADAPTIVE_TRANSPARENCY_TOLERANCE = 0.1 # Acceptable transparency quality
|
| 89 |
+
|
| 90 |
# Quality settings
|
| 91 |
VIDEO_CODEC = 'libx264'
|
| 92 |
VIDEO_BITRATE = '8000k'
|
|
|
|
| 522 |
1. SAM2 creates reference mask from first few frames
|
| 523 |
2. MatAnyone processes entire video using reference mask → pha.mp4
|
| 524 |
3. Create green screen video using pha.mp4 as alpha matte
|
| 525 |
+
4. Adaptive chroma key compositing with background → final.mp4
|
| 526 |
+
5. Auto-detects and iteratively optimizes chroma key threshold
|
| 527 |
6. Chunked processing for long videos with memory management
|
| 528 |
+
7. Dimension mismatch handling for robust processing
|
| 529 |
"""
|
| 530 |
|
| 531 |
def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
|
|
|
|
| 717 |
if self.memory_manager.should_clear_memory():
|
| 718 |
self.memory_manager.cleanup_stage("GreenScreen")
|
| 719 |
|
| 720 |
+
# Stage 4: Adaptive final compositing
|
| 721 |
if callback:
|
| 722 |
+
callback("Adaptive compositing with background...", 90)
|
| 723 |
+
logger.info("STAGE 4: Adaptive compositing with background...")
|
| 724 |
+
final_path = self._stage4_adaptive_compositing(green_screen_path, background_path, output_path, quality)
|
| 725 |
|
| 726 |
# Final memory cleanup
|
| 727 |
self.memory_manager.cleanup_stage("Final")
|
|
|
|
| 990 |
raise
|
| 991 |
|
| 992 |
# ==============================================================================
|
| 993 |
+
# CHAPTER 10: STAGE 3 - GREEN SCREEN VIDEO CREATION WITH DIMENSION FIX
|
| 994 |
# ==============================================================================
|
| 995 |
|
| 996 |
@MemoryTester.monitor_memory_during_processing
|
|
|
|
| 1104 |
raise
|
| 1105 |
|
| 1106 |
# ==============================================================================
|
| 1107 |
+
# CHAPTER 11: STAGE 4 - ADAPTIVE CHROMA KEY COMPOSITING
|
| 1108 |
# ==============================================================================
|
| 1109 |
|
| 1110 |
+
@MemoryTester.monitor_memory_during_processing
|
| 1111 |
+
def _stage4_adaptive_compositing(self, green_screen_path: str, background_path: str,
|
| 1112 |
+
output_path: str, quality: str) -> str:
|
| 1113 |
"""
|
| 1114 |
+
Stage 4: Final compositing with adaptive threshold optimization.
|
| 1115 |
|
| 1116 |
+
Iteratively adjusts chroma key threshold until green is properly removed.
|
| 1117 |
"""
|
| 1118 |
try:
|
| 1119 |
# Get quality profile
|
| 1120 |
profile = QualityManager.get_profile(quality)
|
| 1121 |
|
| 1122 |
+
# Load videos/images
|
| 1123 |
green_clip = VideoFileClip(green_screen_path)
|
| 1124 |
|
|
|
|
| 1125 |
if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
|
|
|
|
| 1126 |
bg_clip = VideoFileClip(background_path)
|
|
|
|
| 1127 |
if bg_clip.duration < green_clip.duration:
|
| 1128 |
bg_clip = bg_clip.loop(duration=green_clip.duration)
|
|
|
|
| 1129 |
bg_clip = bg_clip.subclip(0, green_clip.duration)
|
| 1130 |
else:
|
|
|
|
| 1131 |
bg_clip = ImageClip(background_path, duration=green_clip.duration)
|
| 1132 |
|
|
|
|
| 1133 |
bg_clip = bg_clip.resize((green_clip.w, green_clip.h))
|
| 1134 |
|
| 1135 |
+
# Start with auto-detected threshold
|
| 1136 |
+
initial_threshold = self._auto_detect_chroma_threshold(green_screen_path)
|
| 1137 |
+
logger.info(f"Initial auto-detected threshold: {initial_threshold:.4f}")
|
| 1138 |
+
|
| 1139 |
+
# Adaptive optimization
|
| 1140 |
+
best_threshold = self._optimize_chroma_threshold(
|
| 1141 |
+
green_clip, bg_clip, initial_threshold,
|
| 1142 |
+
max_iterations=self.config.ADAPTIVE_MAX_ITERATIONS
|
| 1143 |
+
)
|
| 1144 |
+
|
| 1145 |
+
logger.info(f"✅ Optimized threshold: {best_threshold:.4f} (started from {initial_threshold:.4f})")
|
| 1146 |
|
| 1147 |
+
# Apply final chroma key with optimized threshold
|
| 1148 |
green_screen_keyed = green_clip.fx(vfx.mask_color,
|
| 1149 |
color=self.config.GREEN_COLOR_NORMALIZED,
|
| 1150 |
+
thr=best_threshold,
|
| 1151 |
+
s=0.1) # Reduced smoothing for sharper edges
|
| 1152 |
|
| 1153 |
+
# Composite and write
|
| 1154 |
final_clip = CompositeVideoClip([bg_clip, green_screen_keyed])
|
| 1155 |
|
|
|
|
| 1156 |
write_params = {
|
| 1157 |
'codec': self.config.VIDEO_CODEC,
|
| 1158 |
'bitrate': profile['bitrate'],
|
|
|
|
| 1161 |
'logger': None
|
| 1162 |
}
|
| 1163 |
|
|
|
|
| 1164 |
if 'crf' in profile:
|
| 1165 |
write_params['ffmpeg_params'] = ['-crf', str(profile['crf'])]
|
| 1166 |
|
|
|
|
| 1168 |
|
| 1169 |
# Cleanup
|
| 1170 |
green_clip.close()
|
| 1171 |
+
bg_clip.close()
|
| 1172 |
final_clip.close()
|
| 1173 |
|
|
|
|
| 1174 |
if not os.path.exists(output_path):
|
| 1175 |
raise RuntimeError("Final output file was not created")
|
| 1176 |
+
|
| 1177 |
+
file_size = os.path.getsize(output_path) / (1024 * 1024)
|
| 1178 |
+
logger.info(f"✅ Adaptive compositing completed: {output_path} ({file_size:.1f} MB)")
|
| 1179 |
|
| 1180 |
return output_path
|
| 1181 |
|
| 1182 |
except Exception as e:
|
| 1183 |
+
logger.error(f"Stage 4 adaptive failed: {e}")
|
| 1184 |
raise
|
| 1185 |
+
|
| 1186 |
+
def _optimize_chroma_threshold(self, green_clip, bg_clip, initial_threshold: float,
|
| 1187 |
+
max_iterations: int = 10) -> float:
|
| 1188 |
+
"""
|
| 1189 |
+
Iteratively optimize chroma key threshold by analyzing output quality.
|
| 1190 |
+
"""
|
| 1191 |
+
threshold = initial_threshold
|
| 1192 |
+
best_threshold = threshold
|
| 1193 |
+
best_score = float('inf')
|
| 1194 |
+
|
| 1195 |
+
# Binary search bounds
|
| 1196 |
+
low = max(self.config.MIN_THRESHOLD, initial_threshold * 0.5)
|
| 1197 |
+
high = min(self.config.MAX_THRESHOLD, initial_threshold * 2.0)
|
| 1198 |
+
|
| 1199 |
+
# Track tested thresholds to avoid repetition
|
| 1200 |
+
tested_thresholds = set()
|
| 1201 |
+
|
| 1202 |
+
for iteration in range(max_iterations):
|
| 1203 |
+
# Round threshold to avoid tiny differences
|
| 1204 |
+
threshold = round(threshold, 4)
|
| 1205 |
+
|
| 1206 |
+
# Skip if already tested
|
| 1207 |
+
if threshold in tested_thresholds:
|
| 1208 |
+
logger.info(f" Threshold {threshold:.4f} already tested, adjusting...")
|
| 1209 |
+
threshold = (low + high) / 2
|
| 1210 |
+
continue
|
| 1211 |
+
|
| 1212 |
+
tested_thresholds.add(threshold)
|
| 1213 |
+
logger.info(f"🔄 Optimization iteration {iteration + 1}/{max_iterations}, testing threshold: {threshold:.4f}")
|
| 1214 |
+
|
| 1215 |
+
# Apply chroma key with current threshold
|
| 1216 |
+
keyed = green_clip.fx(vfx.mask_color,
|
| 1217 |
+
color=self.config.GREEN_COLOR_NORMALIZED,
|
| 1218 |
+
thr=threshold,
|
| 1219 |
+
s=0.1)
|
| 1220 |
+
|
| 1221 |
+
# Composite
|
| 1222 |
+
test_composite = CompositeVideoClip([bg_clip, keyed])
|
| 1223 |
+
|
| 1224 |
+
# Extract test frames (beginning, middle, end)
|
| 1225 |
+
test_times = [
|
| 1226 |
+
green_clip.duration * 0.1,
|
| 1227 |
+
green_clip.duration * 0.5,
|
| 1228 |
+
green_clip.duration * 0.9
|
| 1229 |
+
]
|
| 1230 |
+
|
| 1231 |
+
total_green_score = 0
|
| 1232 |
+
total_transparency_score = 0
|
| 1233 |
+
|
| 1234 |
+
for test_time in test_times:
|
| 1235 |
+
test_frame = test_composite.get_frame(test_time)
|
| 1236 |
+
bg_frame = bg_clip.get_frame(test_time)
|
| 1237 |
+
|
| 1238 |
+
# Analyze the frame for green residue
|
| 1239 |
+
green_score = self._analyze_green_residue(test_frame)
|
| 1240 |
+
transparency_score = self._analyze_transparency_quality(test_frame, bg_frame)
|
| 1241 |
+
|
| 1242 |
+
total_green_score += green_score
|
| 1243 |
+
total_transparency_score += transparency_score
|
| 1244 |
+
|
| 1245 |
+
# Average scores
|
| 1246 |
+
avg_green_score = total_green_score / len(test_times)
|
| 1247 |
+
avg_transparency_score = total_transparency_score / len(test_times)
|
| 1248 |
+
|
| 1249 |
+
# Combined score (lower is better)
|
| 1250 |
+
total_score = avg_green_score + avg_transparency_score * 0.5
|
| 1251 |
+
|
| 1252 |
+
logger.info(f" 📊 Green residue: {avg_green_score:.4f}, Transparency: {avg_transparency_score:.4f}, Total: {total_score:.4f}")
|
| 1253 |
+
|
| 1254 |
+
# Update best if improved
|
| 1255 |
+
if total_score < best_score:
|
| 1256 |
+
best_score = total_score
|
| 1257 |
+
best_threshold = threshold
|
| 1258 |
+
logger.info(f" ✅ New best threshold: {best_threshold:.4f} (score: {best_score:.4f})")
|
| 1259 |
+
|
| 1260 |
+
# Check if we're good enough
|
| 1261 |
+
if avg_green_score < self.config.ADAPTIVE_GREEN_TOLERANCE and \
|
| 1262 |
+
avg_transparency_score < self.config.ADAPTIVE_TRANSPARENCY_TOLERANCE:
|
| 1263 |
+
logger.info(f" 🎯 Acceptable quality reached! Stopping optimization.")
|
| 1264 |
+
break
|
| 1265 |
+
|
| 1266 |
+
# Adjust threshold using binary search
|
| 1267 |
+
if avg_green_score > 0.05: # Too much green remains
|
| 1268 |
+
logger.info(f" 🟢 Too much green, decreasing threshold")
|
| 1269 |
+
high = threshold
|
| 1270 |
+
threshold = (low + threshold) / 2
|
| 1271 |
+
elif avg_transparency_score > 0.3: # Too much was removed
|
| 1272 |
+
logger.info(f" 👤 Subject too transparent, increasing threshold")
|
| 1273 |
+
low = threshold
|
| 1274 |
+
threshold = (threshold + high) / 2
|
| 1275 |
+
else:
|
| 1276 |
+
# Fine-tune around current value
|
| 1277 |
+
if avg_green_score > avg_transparency_score:
|
| 1278 |
+
threshold *= 0.95 # Slightly more aggressive
|
| 1279 |
+
else:
|
| 1280 |
+
threshold *= 1.05 # Slightly less aggressive
|
| 1281 |
+
|
| 1282 |
+
# Ensure we stay in bounds
|
| 1283 |
+
threshold = np.clip(threshold, self.config.MIN_THRESHOLD, self.config.MAX_THRESHOLD)
|
| 1284 |
+
|
| 1285 |
+
# Clean up test composite
|
| 1286 |
+
test_composite.close()
|
| 1287 |
+
keyed.close()
|
| 1288 |
+
|
| 1289 |
+
# Stop if converged
|
| 1290 |
+
if abs(threshold - best_threshold) < 0.001 and iteration > 3:
|
| 1291 |
+
logger.info(" 📍 Converged, stopping optimization")
|
| 1292 |
+
break
|
| 1293 |
+
|
| 1294 |
+
return best_threshold
|
| 1295 |
+
|
| 1296 |
+
def _analyze_green_residue(self, frame: np.ndarray) -> float:
|
| 1297 |
+
"""
|
| 1298 |
+
Analyze how much green remains in the frame.
|
| 1299 |
+
Returns score from 0 (no green) to 1 (lots of green).
|
| 1300 |
+
"""
|
| 1301 |
+
# Convert to float
|
| 1302 |
+
img = frame.astype(np.float32) / 255.0
|
| 1303 |
+
|
| 1304 |
+
# Detect pure green pixels
|
| 1305 |
+
green_pixels = (
|
| 1306 |
+
(img[:,:,1] > 0.7) & # High green
|
| 1307 |
+
(img[:,:,0] < 0.3) & # Low red
|
| 1308 |
+
(img[:,:,2] < 0.3) # Low blue
|
| 1309 |
+
)
|
| 1310 |
+
|
| 1311 |
+
# Calculate percentage of green pixels
|
| 1312 |
+
green_ratio = np.sum(green_pixels) / (frame.shape[0] * frame.shape[1])
|
| 1313 |
+
|
| 1314 |
+
# Also check for greenish tint in other pixels
|
| 1315 |
+
greenish_pixels = (
|
| 1316 |
+
(img[:,:,1] > img[:,:,0] * 1.5) & # Green > Red * 1.5
|
| 1317 |
+
(img[:,:,1] > img[:,:,2] * 1.5) & # Green > Blue * 1.5
|
| 1318 |
+
(img[:,:,1] > 0.4) # Significant green
|
| 1319 |
+
)
|
| 1320 |
+
|
| 1321 |
+
greenish_ratio = np.sum(greenish_pixels) / (frame.shape[0] * frame.shape[1])
|
| 1322 |
+
|
| 1323 |
+
# Combined score
|
| 1324 |
+
score = green_ratio + greenish_ratio * 0.3
|
| 1325 |
+
|
| 1326 |
+
return min(1.0, score)
|
| 1327 |
+
|
| 1328 |
+
def _analyze_transparency_quality(self, composite_frame: np.ndarray, bg_frame: np.ndarray) -> float:
|
| 1329 |
+
"""
|
| 1330 |
+
Analyze if too much of the subject was removed.
|
| 1331 |
+
Returns score from 0 (good) to 1 (too much removed).
|
| 1332 |
+
"""
|
| 1333 |
+
# Calculate difference between composite and background
|
| 1334 |
+
diff = np.abs(composite_frame.astype(np.float32) - bg_frame.astype(np.float32))
|
| 1335 |
+
|
| 1336 |
+
# Sum of differences (more difference = more of subject preserved)
|
| 1337 |
+
total_diff = np.sum(diff) / (255.0 * 3 * composite_frame.shape[0] * composite_frame.shape[1])
|
| 1338 |
+
|
| 1339 |
+
# If difference is too small, too much was removed
|
| 1340 |
+
if total_diff < 0.05: # Less than 5% different from background
|
| 1341 |
+
return 1.0 # Bad - subject was removed
|
| 1342 |
+
elif total_diff > 0.3: # More than 30% different
|
| 1343 |
+
return 0.0 # Good - subject well preserved
|
| 1344 |
+
else:
|
| 1345 |
+
# Linear interpolation
|
| 1346 |
+
return 1.0 - (total_diff - 0.05) / 0.25
|
| 1347 |
|
| 1348 |
# ==============================================================================
|
| 1349 |
# CHAPTER 12: AUTO CHROMA KEY THRESHOLD DETECTION
|
|
|
|
| 1351 |
|
| 1352 |
def _auto_detect_chroma_threshold(self, green_screen_path: str) -> float:
|
| 1353 |
"""
|
| 1354 |
+
Auto-detect initial chroma key threshold by analyzing green screen video.
|
| 1355 |
|
| 1356 |
This method:
|
| 1357 |
1. Samples frames from the green screen video
|
| 1358 |
2. Identifies green background pixels vs person pixels
|
| 1359 |
3. Calculates color distance between greenest background and person
|
| 1360 |
+
4. Sets threshold as percentage of that distance for initial guess
|
| 1361 |
"""
|
| 1362 |
try:
|
| 1363 |
+
logger.info("Analyzing green screen video for initial threshold detection...")
|
| 1364 |
|
| 1365 |
# Open green screen video
|
| 1366 |
cap = cv2.VideoCapture(green_screen_path)
|
|
|
|
| 1370 |
|
| 1371 |
# Get video properties
|
| 1372 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
|
|
| 1373 |
|
| 1374 |
# Sample frames evenly across the video
|
| 1375 |
frame_indices = np.linspace(0, total_frames - 1,
|
|
|
|
| 1407 |
|
| 1408 |
logger.info(f"Analyzed {len(green_pixels)} green pixels and {len(person_pixels)} person pixels")
|
| 1409 |
|
| 1410 |
+
# Calculate initial threshold
|
| 1411 |
+
threshold = self._calculate_initial_threshold(green_pixels, person_pixels)
|
| 1412 |
|
| 1413 |
# Clamp to safe range
|
| 1414 |
threshold = np.clip(threshold, self.config.MIN_THRESHOLD, self.config.MAX_THRESHOLD)
|
| 1415 |
|
| 1416 |
+
logger.info(f"Initial threshold calculated: {threshold:.4f}")
|
| 1417 |
return threshold
|
| 1418 |
|
| 1419 |
except Exception as e:
|
|
|
|
| 1445 |
)
|
| 1446 |
|
| 1447 |
# Sample pixels (subsample for performance)
|
|
|
|
|
|
|
| 1448 |
# Sample green pixels
|
| 1449 |
green_coords = np.where(green_mask)
|
| 1450 |
if len(green_coords[0]) > 1000:
|
|
|
|
| 1451 |
indices = np.random.choice(len(green_coords[0]), 1000, replace=False)
|
| 1452 |
green_coords = (green_coords[0][indices], green_coords[1][indices])
|
| 1453 |
|
|
|
|
| 1456 |
# Sample person pixels
|
| 1457 |
person_coords = np.where(person_mask)
|
| 1458 |
if len(person_coords[0]) > 1000:
|
|
|
|
| 1459 |
indices = np.random.choice(len(person_coords[0]), 1000, replace=False)
|
| 1460 |
person_coords = (person_coords[0][indices], person_coords[1][indices])
|
| 1461 |
|
|
|
|
| 1467 |
logger.warning(f"Frame color analysis failed: {e}")
|
| 1468 |
return [], []
|
| 1469 |
|
| 1470 |
+
def _calculate_initial_threshold(self, green_pixels: np.ndarray, person_pixels: np.ndarray) -> float:
|
| 1471 |
"""
|
| 1472 |
+
Calculate initial threshold based on color analysis.
|
| 1473 |
|
| 1474 |
Strategy:
|
| 1475 |
1. Find the "least green" green pixels (edge of green screen)
|
|
|
|
| 1484 |
person_lab = self._rgb_to_lab_batch(person_pixels)
|
| 1485 |
|
| 1486 |
# Use clustering to find representative colors
|
|
|
|
| 1487 |
kmeans_green = KMeans(n_clusters=min(5, len(green_lab)//10), random_state=42, n_init=10)
|
| 1488 |
green_clusters = kmeans_green.fit_predict(green_lab)
|
| 1489 |
green_centers = kmeans_green.cluster_centers_
|
|
|
|
| 1506 |
color_distance = np.linalg.norm(main_green_cluster - closest_person_cluster)
|
| 1507 |
|
| 1508 |
# Convert LAB distance to threshold
|
| 1509 |
+
# Start conservative - use 40% of distance for initial guess
|
| 1510 |
+
threshold = (color_distance / 100.0) * 0.4
|
|
|
|
| 1511 |
|
| 1512 |
+
logger.info(f"Color distance analysis: LAB distance={color_distance:.2f}, initial threshold={threshold:.3f}")
|
| 1513 |
|
| 1514 |
return threshold
|
| 1515 |
else:
|
|
|
|
| 1518 |
|
| 1519 |
except Exception as e:
|
| 1520 |
logger.warning(f"Threshold calculation failed: {e}")
|
|
|
|
| 1521 |
return self._simple_rgb_threshold(green_pixels, person_pixels)
|
| 1522 |
|
| 1523 |
def _simple_rgb_threshold(self, green_pixels: np.ndarray, person_pixels: np.ndarray) -> float:
|
|
|
|
| 1530 |
green_distances = np.linalg.norm(person_pixels - avg_green, axis=1)
|
| 1531 |
min_distance = np.min(green_distances)
|
| 1532 |
|
| 1533 |
+
# Use 50% of minimum distance as initial threshold
|
| 1534 |
+
threshold = min_distance * 0.5
|
| 1535 |
|
| 1536 |
logger.info(f"RGB fallback: min_distance={min_distance:.3f}, threshold={threshold:.3f}")
|
| 1537 |
return threshold
|
|
|
|
| 1627 |
|
| 1628 |
def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
|
| 1629 |
"""
|
| 1630 |
+
Factory function to create TwoStageProcessor with compatibility layer.
|
|
|
|
| 1631 |
|
| 1632 |
This provides a clean interface for integration with existing systems.
|
| 1633 |
"""
|
|
|
|
| 1643 |
config = ProcessingConfig()
|
| 1644 |
logger.info(f"Default config: {config.REFERENCE_FRAMES} reference frames, green={config.GREEN_COLOR}")
|
| 1645 |
logger.info(f"Auto-detection: {config.AUTO_DETECTION_FRAMES} analysis frames, fallback={config.AUTO_DETECTION_FALLBACK}")
|
| 1646 |
+
logger.info(f"Adaptive optimization: {config.ADAPTIVE_MAX_ITERATIONS} max iterations")
|
| 1647 |
logger.info(f"Chunked processing: {config.MAX_CHUNK_DURATION}s chunks with {config.CHUNK_OVERLAP_FRAMES} frame overlap")
|
| 1648 |
logger.info(f"Memory management: Cache clearing={'ON' if config.CLEAR_CACHE_AFTER_STAGE else 'OFF'}")
|
| 1649 |
logger.info(f"Memory testing: {'DISABLED' if not config.ENABLE_MEMORY_TESTING else 'ENABLED'}")
|