Spaces:
Running
Running
| import os | |
| import uuid | |
| import statistics | |
| def create_premiere_xml(project_name, video_path, overlay_segments, duration_frames, width=1080, height=1920, timebase=30, video_file_id=None, audio_file_id=None, scale_value=100.0, face_data=None, source_width=1920, source_height=1080): | |
| """ | |
| Generates a Premiere Pro XML with segmented cuts, supporting Dual-Track (Split Screen) for multi-face scenarios. | |
| """ | |
| def get_uid(): return str(uuid.uuid4())[:12] | |
| if not video_file_id: video_file_id = f"file-video-{get_uid()}" | |
| if not audio_file_id: audio_file_id = f"file-audio-{get_uid()}" | |
| sequence_uuid = str(uuid.uuid4()) | |
| # helper for file blocks | |
| def get_file_block(fid, fpath, is_audio_only=False): | |
| audio_blk = "" if is_audio_only else "<audio><samplecharacteristics><depth>16</depth><samplerate>48000</samplerate></samplecharacteristics><channelcount>2</channelcount></audio>" | |
| width_f = int(source_width) | |
| height_f = int(source_height) | |
| return f"""<file id="{fid}"><name>{os.path.basename(fpath)}</name><pathurl>{fpath}</pathurl><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><duration>{duration_frames}</duration><media><video><samplecharacteristics><width>{width_f}</width><height>{height_f}</height><alpha>straight</alpha></samplecharacteristics></video>{audio_blk}</media></file>""" | |
| # --- PROCESS FACE DATA (Per Frame) --- | |
| # We store raw faces per frame to decide clustering later | |
| faces_per_frame = {} | |
| # Dimensions for Coordinate Normalization (Default to source if not in JSON) | |
| coords_w = source_width | |
| coords_h = source_height | |
| if face_data: | |
| # Check for Metadata in first entry to determine Coordinate System Scale | |
| if len(face_data) > 0: | |
| first_entry = face_data[0] | |
| if "src_size" in first_entry: | |
| try: | |
| w_json, h_json = first_entry["src_size"] | |
| if w_json > 0 and h_json > 0: | |
| coords_w = w_json | |
| coords_h = h_json | |
| print(f"Coordinate System Reference: {coords_w}x{coords_h}") | |
| # DO NOT overwrite source_width/source_height (Actual Media Dims) | |
| except: pass | |
| print(f"Processing {len(face_data)} face entries for Dual-Track logic...") | |
| for entry in face_data: | |
| f_idx = entry.get('frame') | |
| faces = entry.get('faces', []) | |
| if not faces: continue | |
| processed_faces = [] | |
| for f in faces: | |
| cx = (f[0] + f[2]) / 2.0 | |
| cy = (f[1] + f[3]) / 2.0 | |
| area = (f[2]-f[0]) * (f[3]-f[1]) | |
| # Calculate Normalized Center using COORDS Dimensions | |
| # nx, ny are 0..1 relative to the original detection frame | |
| nx = cx / max(1.0, float(coords_w)) | |
| ny = cy / max(1.0, float(coords_h)) | |
| # rh uses coords_h | |
| rh_val = 0.1 | |
| if len(f) > 4: | |
| rh_val = float(f[4]) | |
| else: | |
| rh_val = (f[3] - f[1]) / max(1.0, float(coords_h)) | |
| processed_faces.append({ | |
| 'cx': cx, | |
| 'cy': cy, | |
| 'nx': nx, | |
| 'ny': ny, | |
| 'area': area, | |
| 'rh': rh_val | |
| }) | |
| faces_per_frame[f_idx] = processed_faces | |
| # Ensure source_width/height are floats for calculation later | |
| source_width = float(source_width) | |
| source_height = float(source_height) | |
| # --- SEGMENTATION LOGIC --- | |
| cuts_v1 = [] # Track 1 (Main / Left) | |
| cuts_v2 = [] # Track 2 (Secondary / Right) | |
| fps_float = float(timebase) | |
| # Store dynamic scale suggestion per cut if possible | |
| # (Not fully implemented per-cut yet, but we can compute a global or per-segment average if we stored it) | |
| if overlay_segments: | |
| current_frame = 0 | |
| # Defaults (Normalized Centers) | |
| last_center_v1 = (0.5, 0.5) | |
| last_center_v2 = (0.5, 0.5) | |
| # We also want to track optimal scale for the segment | |
| last_opt_scale = None | |
| sorted_segs = sorted(overlay_segments, key=lambda x: x['start']) | |
| is_last_dual = False # Initialize is_last_dual | |
| for idx, seg in enumerate(sorted_segs): | |
| start_f = int(seg['start'] * fps_float) | |
| end_f = int(seg['end'] * fps_float) | |
| # Fill Gaps | |
| if start_f > current_frame: | |
| cuts_v1.append({"start": current_frame, "end": start_f, "center": last_center_v1, "opt_scale": last_opt_scale}) | |
| if is_last_dual: | |
| cuts_v2.append({"start": current_frame, "end": start_f, "center": last_center_v2, "opt_scale": last_opt_scale}) | |
| pass | |
| # Analyze Faces | |
| segment_faces = [] | |
| frame_count = 0 | |
| dual_face_frames = 0 | |
| for f_idx in range(start_f, end_f): | |
| if f_idx in faces_per_frame: | |
| fs = faces_per_frame[f_idx] | |
| segment_faces.append(fs) | |
| if len(fs) >= 2: | |
| dual_face_frames += 1 | |
| frame_count += 1 | |
| is_dual_track = False | |
| if frame_count > 0: | |
| dual_ratio = dual_face_frames / frame_count | |
| if dual_ratio > 0.3: | |
| is_dual_track = True | |
| elif frame_count < 15 and dual_face_frames > 0: | |
| is_dual_track = True | |
| center_v1 = last_center_v1 | |
| center_v2 = last_center_v2 | |
| # Coordinate lists for mode calculation | |
| cand_v1_x, cand_v1_y = [], [] | |
| cand_v2_x, cand_v2_y = [], [] | |
| cand_rh = [] # Relative heights | |
| if segment_faces: | |
| for fs in segment_faces: | |
| # Filter Top 2 by Area | |
| top_faces = sorted(fs, key=lambda x: x['area'], reverse=True)[:2] | |
| # Sort by X (Left to Right) | |
| fs_sorted = sorted(top_faces, key=lambda x: x['nx']) | |
| if is_dual_track and len(fs_sorted) >= 2: | |
| # Left -> V2 (Top Track, Upper Screen) | |
| # Right -> V1 (Bottom Track, Lower Screen) | |
| f_left = fs_sorted[0] | |
| f_right = fs_sorted[-1] | |
| cand_rh.append(f_left.get('rh', 0.1)) | |
| cand_rh.append(f_right.get('rh', 0.1)) | |
| if abs(f_left['nx'] - f_right['nx']) < 0.20: | |
| # Fallback to single | |
| f_main = max(fs, key=lambda x: x['area']) | |
| cand_v1_x.append(f_main['nx']) | |
| cand_v1_y.append(f_main['ny']) | |
| if 'rh' in f_main: cand_rh[-2:] = [f_main['rh']] | |
| else: | |
| # Swap Assignment Here: | |
| # Left Face -> V2 (Top) | |
| cand_v2_x.append(f_left['nx']) | |
| cand_v2_y.append(f_left['ny']) | |
| # Right Face -> V1 (Bottom) | |
| cand_v1_x.append(f_right['nx']) | |
| cand_v1_y.append(f_right['ny']) | |
| elif fs_sorted: | |
| # Single -> V1 | |
| f1 = max(fs_sorted, key=lambda x: x['area']) | |
| cand_v1_x.append(f1['nx']) | |
| cand_v1_y.append(f1['ny']) | |
| cand_rh.append(f1.get('rh', 0.1)) | |
| # Smart Scale Logic REMOVED per user request | |
| # We will rely on strict "Fill Split Pane Height" logic in make_video_track | |
| opt_scale = None | |
| last_opt_scale = None | |
| # Apply Mode (Robust avg) | |
| def get_mode_avg(vals): | |
| if not vals: return 0.5 | |
| try: return statistics.mean(vals) | |
| except: return vals[0] | |
| # If after filtering we have no valid V2 candidates, revert to Single Track | |
| if is_dual_track and not cand_v2_x: | |
| is_dual_track = False | |
| if cand_v1_x: | |
| center_v1 = (get_mode_avg(cand_v1_x), get_mode_avg(cand_v1_y)) | |
| if is_dual_track: | |
| if cand_v2_x: | |
| center_v2 = (get_mode_avg(cand_v2_x), get_mode_avg(cand_v2_y)) | |
| else: | |
| # This branch should rarely be hit now due to check above | |
| if last_center_v2 != (0.5, 0.5): center_v2 = last_center_v2 | |
| else: center_v2 = (center_v1[0] + 0.25, center_v1[1]) | |
| # Append Cuts | |
| cuts_v1.append({"start": start_f, "end": end_f, "center": center_v1, "opt_scale": opt_scale}) | |
| if is_dual_track: | |
| cuts_v2.append({"start": start_f, "end": end_f, "center": center_v2, "opt_scale": opt_scale}) | |
| last_center_v2 = center_v2 | |
| is_last_dual = True | |
| else: | |
| is_last_dual = False | |
| last_center_v1 = center_v1 | |
| current_frame = end_f | |
| # Final gap | |
| if current_frame < duration_frames: | |
| cuts_v1.append({"start": current_frame, "end": duration_frames, "center": last_center_v1, "opt_scale": last_opt_scale}) | |
| else: | |
| cuts_v1.append({"start": 0, "end": duration_frames, "center": (0.5, 0.5), "opt_scale": None}) | |
| print(f"Generated {len(cuts_v1)} V1 cuts and {len(cuts_v2)} V2 cuts.") | |
| # --- GENERATE XML TRACKS --- | |
| dual_starts = set(c['start'] for c in cuts_v2) | |
| def make_video_track(cuts_list, track_type="main"): | |
| items = "" | |
| for cut in cuts_list: | |
| seg_start, seg_end = cut['start'], cut['end'] | |
| nx, ny = cut['center'] # These are Normalized Source Coords (0..1) | |
| if seg_end - seg_start <= 0: continue | |
| is_dual = (seg_start in dual_starts) | |
| # --- DIMENSION CHECKS --- | |
| src_w = float(source_width) | |
| src_h = float(source_height) | |
| if src_h < 100: src_h = 1080.0 # Safety default | |
| # --- SCALE LOGIC --- | |
| # Fill Sequence Height (Matches User's Request for correct scaling) | |
| # Use the actual Sequence Height passed to create_premiere_xml | |
| # Fill Sequence Height (Matches User's Request for correct scaling) | |
| # Use the actual Sequence Height passed to create_premiere_xml | |
| target_h = float(height) | |
| # ALWAYS scale to fill the sequence height | |
| final_scale = (target_h / src_h) * 100.0 | |
| if final_scale < 10.0: final_scale = 100.0 | |
| s_val = final_scale / 100.0 | |
| # --- POSITIONING LOGIC (Shift-Based) --- | |
| # We assume Anchor Point is (0,0) -> CENTER of Clip. | |
| # We want to move the Face (nx, ny) to the Target Screen Position. | |
| # 1. Face Offset from Clip Center (in Source Pixels) | |
| # Center of Source is 0.5, 0.5 | |
| off_x_src = (nx - 0.5) * src_w | |
| off_y_src = (ny - 0.5) * src_h | |
| # 2. Face Offset in Screen Pixels (after Scale) | |
| off_x_seq = off_x_src * s_val | |
| off_y_seq = off_y_src * s_val | |
| # 3. Target Screen Position (Pixels) | |
| # Sequence Dimensions: width, height (e.g. 1080, 1920) | |
| target_screen_x = 0.5 * width # Center X | |
| target_screen_y = 0.5 * height # Center Y (Default) | |
| if track_type == "secondary": | |
| target_screen_y = 0.25 * height # Top Quarter | |
| elif track_type == "main" and is_dual: | |
| target_screen_y = 0.75 * height # Bottom Quarter | |
| # 4. Required Clip Center Position | |
| # To place Face at Target, we shift Clip Center by -Offset | |
| req_center_x = target_screen_x - off_x_seq | |
| req_center_y = target_screen_y - off_y_seq | |
| # 5. Normalize for XML (0..1 relative to Sequence) | |
| # XML Coordinate System is Relative to Center (0,0 is Center). | |
| # Absolute 0..1 maps to -0.5..0.5 in XML. | |
| pos_h = (req_center_x / float(width)) - 0.5 | |
| pos_v = (req_center_y / float(height)) - 0.5 | |
| seg_id = f"clipitem-video-{get_uid()}" | |
| # EXPLICITLY REMOVE Anchor Point (centerOffset) to use Default (Center of Clip). | |
| # We calculate pos_h/pos_v assuming we are placing the Clip Center. | |
| basic_motion = f"""<filter><effect><name>Basic Motion</name><effectid>basic</effectid><effectcategory>motion</effectcategory><effecttype>motion</effecttype><mediatype>video</mediatype><parameter authoringApp="PremierePro"><parameterid>scale</parameterid><name>Scale</name><value>{final_scale:.2f}</value></parameter><parameter authoringApp="PremierePro"><parameterid>center</parameterid><name>Center</name><value><horiz>{pos_h:.5f}</horiz><vert>{pos_v:.5f}</vert></value></parameter></effect></filter>""" | |
| # --- CROP LOGIC --- | |
| crop_xml = "" | |
| if track_type == "secondary": | |
| crop_xml = f"""<filter><effect><name>Crop</name><effectid>crop</effectid><effectcategory>transform</effectcategory><effecttype>video</effecttype><mediatype>video</mediatype><parameter authoringApp="PremierePro"><parameterid>bottom</parameterid><name>Bottom</name><value>50.0</value></parameter></effect></filter>""" | |
| elif track_type == "main" and is_dual: | |
| crop_xml = f"""<filter><effect><name>Crop</name><effectid>crop</effectid><effectcategory>transform</effectcategory><effecttype>video</effecttype><mediatype>video</mediatype><parameter authoringApp="PremierePro"><parameterid>top</parameterid><name>Top</name><value>50.0</value></parameter></effect></filter>""" | |
| items += f"""<clipitem id="{seg_id}"><name>{os.path.basename(video_path)}</name><duration>{duration_frames}</duration><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><start>{seg_start}</start><end>{seg_end}</end><in>{seg_start}</in><out>{seg_end}</out>{get_file_block(video_file_id, video_path)}{basic_motion}{crop_xml}</clipitem>""" | |
| return f"<track>{items}</track>" | |
| track_v1 = make_video_track(cuts_v1, "main") | |
| track_v2 = make_video_track(cuts_v2, "secondary") | |
| # --- OVERLAY TRACK --- | |
| track_overlay_block = "" | |
| if overlay_segments: | |
| overlay_clips = "" | |
| for seg in overlay_segments: | |
| # ... (overlay logic same as before) | |
| # Re-implement simple loop here to ensure variable scope | |
| start_f = int(seg['start'] * fps_float) | |
| end_f = int(seg['end'] * fps_float) | |
| clip_dur = end_f - start_f | |
| if clip_dur <= 0: continue | |
| ov_fid = f"file-ov-{seg['index']}-{get_uid()}" | |
| ov_cid = f"clip-ov-{seg['index']}-{get_uid()}" | |
| file_blk = f"""<file id="{ov_fid}"><name>{os.path.basename(seg['path'])}</name><pathurl>{seg['path']}</pathurl><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><duration>{clip_dur}</duration><media><video><samplecharacteristics><width>{width}</width><height>{height}</height><alpha>straight</alpha></samplecharacteristics></video></media></file>""" | |
| overlay_clips += f"""<clipitem id="{ov_cid}"><name>{os.path.basename(seg['path'])}</name><duration>{clip_dur}</duration><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><start>{start_f}</start><end>{end_f}</end><in>0</in><out>{clip_dur}</out>{file_blk}<compositemode>normal</compositemode></clipitem>""" | |
| track_overlay_block = f"<track>{overlay_clips}</track>" | |
| else: | |
| track_overlay_block = "<track></track>" | |
| # --- ASSEMBLE --- | |
| timecode_block = f"""<timecode><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><string>00:00:00:00</string><frame>0</frame><displayformat>NDF</displayformat></timecode>""" | |
| audio_blk = f"""<track><clipitem id="{audio_file_id}"><name>{os.path.basename(video_path)}</name><duration>{duration_frames}</duration><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><start>0</start><end>{duration_frames}</end>{get_file_block(video_file_id, video_path)}<sourcetrack><mediatype>audio</mediatype><trackindex>1</trackindex></sourcetrack></clipitem></track>""" | |
| return f"""<?xml version="1.0" encoding="UTF-8"?><xmeml version="4"><sequence id="{sequence_uuid}"><name>{project_name}_CutRef</name><duration>{duration_frames}</duration><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate>{timecode_block}<media><video><format><samplecharacteristics><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><width>{width}</width><height>{height}</height><pixelaspectratio>square</pixelaspectratio></samplecharacteristics></format>{track_v1}{track_v2}{track_overlay_block}</video><audio>{audio_blk}</audio></media></sequence></xmeml>""" | |