import os import uuid import statistics def create_premiere_xml(project_name, video_path, overlay_segments, duration_frames, width=1080, height=1920, timebase=30, video_file_id=None, audio_file_id=None, scale_value=100.0, face_data=None, source_width=1920, source_height=1080): """ Generates a Premiere Pro XML with segmented cuts, supporting Dual-Track (Split Screen) for multi-face scenarios. """ def get_uid(): return str(uuid.uuid4())[:12] if not video_file_id: video_file_id = f"file-video-{get_uid()}" if not audio_file_id: audio_file_id = f"file-audio-{get_uid()}" sequence_uuid = str(uuid.uuid4()) # helper for file blocks def get_file_block(fid, fpath, is_audio_only=False): audio_blk = "" if is_audio_only else "

" width_f = int(source_width) height_f = int(source_height) return f"""{os.path.basename(fpath)}{fpath}{timebase}FALSE{duration_frames}

{audio_blk}""" # --- PROCESS FACE DATA (Per Frame) --- # We store raw faces per frame to decide clustering later faces_per_frame = {} # Dimensions for Coordinate Normalization (Default to source if not in JSON) coords_w = source_width coords_h = source_height if face_data: # Check for Metadata in first entry to determine Coordinate System Scale if len(face_data) > 0: first_entry = face_data[0] if "src_size" in first_entry: try: w_json, h_json = first_entry["src_size"] if w_json > 0 and h_json > 0: coords_w = w_json coords_h = h_json print(f"Coordinate System Reference: {coords_w}x{coords_h}") # DO NOT overwrite source_width/source_height (Actual Media Dims) except: pass print(f"Processing {len(face_data)} face entries for Dual-Track logic...") for entry in face_data: f_idx = entry.get('frame') faces = entry.get('faces', []) if not faces: continue processed_faces = [] for f in faces: cx = (f[0] + f[2]) / 2.0 cy = (f[1] + f[3]) / 2.0 area = (f[2]-f[0]) * (f[3]-f[1]) # Calculate Normalized Center using COORDS Dimensions # nx, ny are 0..1 relative to the original detection frame nx = cx / max(1.0, float(coords_w)) ny = cy / max(1.0, float(coords_h)) # rh uses coords_h rh_val = 0.1 if len(f) > 4: rh_val = float(f[4]) else: rh_val = (f[3] - f[1]) / max(1.0, float(coords_h)) processed_faces.append({ 'cx': cx, 'cy': cy, 'nx': nx, 'ny': ny, 'area': area, 'rh': rh_val }) faces_per_frame[f_idx] = processed_faces # Ensure source_width/height are floats for calculation later source_width = float(source_width) source_height = float(source_height) # --- SEGMENTATION LOGIC --- cuts_v1 = [] # Track 1 (Main / Left) cuts_v2 = [] # Track 2 (Secondary / Right) fps_float = float(timebase) # Store dynamic scale suggestion per cut if possible # (Not fully implemented per-cut yet, but we can compute a global or per-segment average if we stored it) if overlay_segments: current_frame = 0 # Defaults (Normalized Centers) last_center_v1 = (0.5, 0.5) last_center_v2 = (0.5, 0.5) # We also want to track optimal scale for the segment last_opt_scale = None sorted_segs = sorted(overlay_segments, key=lambda x: x['start']) is_last_dual = False # Initialize is_last_dual for idx, seg in enumerate(sorted_segs): start_f = int(seg['start'] * fps_float) end_f = int(seg['end'] * fps_float) # Fill Gaps if start_f > current_frame: cuts_v1.append({"start": current_frame, "end": start_f, "center": last_center_v1, "opt_scale": last_opt_scale}) if is_last_dual: cuts_v2.append({"start": current_frame, "end": start_f, "center": last_center_v2, "opt_scale": last_opt_scale}) pass # Analyze Faces segment_faces = [] frame_count = 0 dual_face_frames = 0 for f_idx in range(start_f, end_f): if f_idx in faces_per_frame: fs = faces_per_frame[f_idx] segment_faces.append(fs) if len(fs) >= 2: dual_face_frames += 1 frame_count += 1 is_dual_track = False if frame_count > 0: dual_ratio = dual_face_frames / frame_count if dual_ratio > 0.3: is_dual_track = True elif frame_count < 15 and dual_face_frames > 0: is_dual_track = True center_v1 = last_center_v1 center_v2 = last_center_v2 # Coordinate lists for mode calculation cand_v1_x, cand_v1_y = [], [] cand_v2_x, cand_v2_y = [], [] cand_rh = [] # Relative heights if segment_faces: for fs in segment_faces: # Filter Top 2 by Area top_faces = sorted(fs, key=lambda x: x['area'], reverse=True)[:2] # Sort by X (Left to Right) fs_sorted = sorted(top_faces, key=lambda x: x['nx']) if is_dual_track and len(fs_sorted) >= 2: # Left -> V2 (Top Track, Upper Screen) # Right -> V1 (Bottom Track, Lower Screen) f_left = fs_sorted[0] f_right = fs_sorted[-1] cand_rh.append(f_left.get('rh', 0.1)) cand_rh.append(f_right.get('rh', 0.1)) if abs(f_left['nx'] - f_right['nx']) < 0.20: # Fallback to single f_main = max(fs, key=lambda x: x['area']) cand_v1_x.append(f_main['nx']) cand_v1_y.append(f_main['ny']) if 'rh' in f_main: cand_rh[-2:] = [f_main['rh']] else: # Swap Assignment Here: # Left Face -> V2 (Top) cand_v2_x.append(f_left['nx']) cand_v2_y.append(f_left['ny']) # Right Face -> V1 (Bottom) cand_v1_x.append(f_right['nx']) cand_v1_y.append(f_right['ny']) elif fs_sorted: # Single -> V1 f1 = max(fs_sorted, key=lambda x: x['area']) cand_v1_x.append(f1['nx']) cand_v1_y.append(f1['ny']) cand_rh.append(f1.get('rh', 0.1)) # Smart Scale Logic REMOVED per user request # We will rely on strict "Fill Split Pane Height" logic in make_video_track opt_scale = None last_opt_scale = None # Apply Mode (Robust avg) def get_mode_avg(vals): if not vals: return 0.5 try: return statistics.mean(vals) except: return vals[0] # If after filtering we have no valid V2 candidates, revert to Single Track if is_dual_track and not cand_v2_x: is_dual_track = False if cand_v1_x: center_v1 = (get_mode_avg(cand_v1_x), get_mode_avg(cand_v1_y)) if is_dual_track: if cand_v2_x: center_v2 = (get_mode_avg(cand_v2_x), get_mode_avg(cand_v2_y)) else: # This branch should rarely be hit now due to check above if last_center_v2 != (0.5, 0.5): center_v2 = last_center_v2 else: center_v2 = (center_v1[0] + 0.25, center_v1[1]) # Append Cuts cuts_v1.append({"start": start_f, "end": end_f, "center": center_v1, "opt_scale": opt_scale}) if is_dual_track: cuts_v2.append({"start": start_f, "end": end_f, "center": center_v2, "opt_scale": opt_scale}) last_center_v2 = center_v2 is_last_dual = True else: is_last_dual = False last_center_v1 = center_v1 current_frame = end_f # Final gap if current_frame < duration_frames: cuts_v1.append({"start": current_frame, "end": duration_frames, "center": last_center_v1, "opt_scale": last_opt_scale}) else: cuts_v1.append({"start": 0, "end": duration_frames, "center": (0.5, 0.5), "opt_scale": None}) print(f"Generated {len(cuts_v1)} V1 cuts and {len(cuts_v2)} V2 cuts.") # --- GENERATE XML TRACKS --- dual_starts = set(c['start'] for c in cuts_v2) def make_video_track(cuts_list, track_type="main"): items = "" for cut in cuts_list: seg_start, seg_end = cut['start'], cut['end'] nx, ny = cut['center'] # These are Normalized Source Coords (0..1) if seg_end - seg_start <= 0: continue is_dual = (seg_start in dual_starts) # --- DIMENSION CHECKS --- src_w = float(source_width) src_h = float(source_height) if src_h < 100: src_h = 1080.0 # Safety default # --- SCALE LOGIC --- # Fill Sequence Height (Matches User's Request for correct scaling) # Use the actual Sequence Height passed to create_premiere_xml # Fill Sequence Height (Matches User's Request for correct scaling) # Use the actual Sequence Height passed to create_premiere_xml target_h = float(height) # ALWAYS scale to fill the sequence height final_scale = (target_h / src_h) * 100.0 if final_scale < 10.0: final_scale = 100.0 s_val = final_scale / 100.0 # --- POSITIONING LOGIC (Shift-Based) --- # We assume Anchor Point is (0,0) -> CENTER of Clip. # We want to move the Face (nx, ny) to the Target Screen Position. # 1. Face Offset from Clip Center (in Source Pixels) # Center of Source is 0.5, 0.5 off_x_src = (nx - 0.5) * src_w off_y_src = (ny - 0.5) * src_h # 2. Face Offset in Screen Pixels (after Scale) off_x_seq = off_x_src * s_val off_y_seq = off_y_src * s_val # 3. Target Screen Position (Pixels) # Sequence Dimensions: width, height (e.g. 1080, 1920) target_screen_x = 0.5 * width # Center X target_screen_y = 0.5 * height # Center Y (Default) if track_type == "secondary": target_screen_y = 0.25 * height # Top Quarter elif track_type == "main" and is_dual: target_screen_y = 0.75 * height # Bottom Quarter # 4. Required Clip Center Position # To place Face at Target, we shift Clip Center by -Offset req_center_x = target_screen_x - off_x_seq req_center_y = target_screen_y - off_y_seq # 5. Normalize for XML (0..1 relative to Sequence) # XML Coordinate System is Relative to Center (0,0 is Center). # Absolute 0..1 maps to -0.5..0.5 in XML. pos_h = (req_center_x / float(width)) - 0.5 pos_v = (req_center_y / float(height)) - 0.5 seg_id = f"clipitem-video-{get_uid()}" # EXPLICITLY REMOVE Anchor Point (centerOffset) to use Default (Center of Clip). # We calculate pos_h/pos_v assuming we are placing the Clip Center. basic_motion = f"""Basic MotionbasicmotionmotionvideoscaleScale{final_scale:.2f}centerCenter{pos_h:.5f}{pos_v:.5f}""" # --- CROP LOGIC --- crop_xml = "" if track_type == "secondary": crop_xml = f"""CropcroptransformvideovideobottomBottom50.0""" elif track_type == "main" and is_dual: crop_xml = f"""CropcroptransformvideovideotopTop50.0""" items += f"""{os.path.basename(video_path)}{duration_frames}{timebase}FALSE{seg_start}{seg_end}{seg_start}{seg_end}{get_file_block(video_file_id, video_path)}{basic_motion}{crop_xml}""" return f"{items}" track_v1 = make_video_track(cuts_v1, "main") track_v2 = make_video_track(cuts_v2, "secondary") # --- OVERLAY TRACK --- track_overlay_block = "" if overlay_segments: overlay_clips = "" for seg in overlay_segments: # ... (overlay logic same as before) # Re-implement simple loop here to ensure variable scope start_f = int(seg['start'] * fps_float) end_f = int(seg['end'] * fps_float) clip_dur = end_f - start_f if clip_dur <= 0: continue ov_fid = f"file-ov-{seg['index']}-{get_uid()}" ov_cid = f"clip-ov-{seg['index']}-{get_uid()}" file_blk = f"""{os.path.basename(seg['path'])}{seg['path']}{timebase}FALSE{clip_dur}

""" overlay_clips += f"""{os.path.basename(seg['path'])}{clip_dur}{timebase}FALSE{start_f}{end_f}0{clip_dur}{file_blk}normal""" track_overlay_block = f"{overlay_clips}" else: track_overlay_block = "" # --- ASSEMBLE --- timecode_block = f"""{timebase}FALSE00:00:00:000NDF""" audio_blk = f"""{os.path.basename(video_path)}{duration_frames}{timebase}FALSE0{duration_frames}{get_file_block(video_file_id, video_path)}audio1""" return f"""{project_name}_CutRef{duration_frames}{timebase}FALSE{timecode_block}

"""