Spaces:

RafaG
/

ViralCutterPRO

Running

App Files Files Community

ViralCutterPRO / scripts /export_xml_lib /xml_generator copy.py

RafaG

Upload 41 files

f7598da verified 6 days ago

raw

history blame contribute delete

18 kB

	import os
	import uuid
	import statistics

	def create_premiere_xml(project_name, video_path, overlay_segments, duration_frames, width=1080, height=1920, timebase=30, video_file_id=None, audio_file_id=None, scale_value=100.0, face_data=None, source_width=1920, source_height=1080):
	"""
	Generates a Premiere Pro XML with segmented cuts, supporting Dual-Track (Split Screen) for multi-face scenarios.
	"""

	def get_uid(): return str(uuid.uuid4())[:12]

	if not video_file_id: video_file_id = f"file-video-{get_uid()}"
	if not audio_file_id: audio_file_id = f"file-audio-{get_uid()}"
	sequence_uuid = str(uuid.uuid4())

	# helper for file blocks
	def get_file_block(fid, fpath, is_audio_only=False):
	audio_blk = "" if is_audio_only else "<audio><samplecharacteristics><depth>16</depth><samplerate>48000</samplerate></samplecharacteristics><channelcount>2</channelcount></audio>"
	width_f = int(source_width)
	height_f = int(source_height)
	return f"""<file id="{fid}"><name>{os.path.basename(fpath)}</name><pathurl>{fpath}</pathurl><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><duration>{duration_frames}</duration><media><video><samplecharacteristics><width>{width_f}</width><height>{height_f}</height><alpha>straight</alpha></samplecharacteristics></video>{audio_blk}</media></file>"""

	# --- PROCESS FACE DATA (Per Frame) ---
	# We store raw faces per frame to decide clustering later
	faces_per_frame = {}

	# Dimensions for Coordinate Normalization (Default to source if not in JSON)
	coords_w = source_width
	coords_h = source_height

	if face_data:
	# Check for Metadata in first entry to determine Coordinate System Scale
	if len(face_data) > 0:
	first_entry = face_data[0]
	if "src_size" in first_entry:
	try:
	w_json, h_json = first_entry["src_size"]
	if w_json > 0 and h_json > 0:
	coords_w = w_json
	coords_h = h_json
	print(f"Coordinate System Reference: {coords_w}x{coords_h}")
	# DO NOT overwrite source_width/source_height (Actual Media Dims)
	except: pass

	print(f"Processing {len(face_data)} face entries for Dual-Track logic...")
	for entry in face_data:
	f_idx = entry.get('frame')
	faces = entry.get('faces', [])
	if not faces: continue

	processed_faces = []
	for f in faces:
	cx = (f[0] + f[2]) / 2.0
	cy = (f[1] + f[3]) / 2.0
	area = (f[2]-f[0]) * (f[3]-f[1])

	# Calculate Normalized Center using COORDS Dimensions
	# nx, ny are 0..1 relative to the original detection frame
	nx = cx / max(1.0, float(coords_w))
	ny = cy / max(1.0, float(coords_h))

	# rh uses coords_h
	rh_val = 0.1
	if len(f) > 4:
	rh_val = float(f[4])
	else:
	rh_val = (f[3] - f[1]) / max(1.0, float(coords_h))

	processed_faces.append({
	'cx': cx,
	'cy': cy,
	'nx': nx,
	'ny': ny,
	'area': area,
	'rh': rh_val
	})

	faces_per_frame[f_idx] = processed_faces

	# Ensure source_width/height are floats for calculation later
	source_width = float(source_width)
	source_height = float(source_height)

	# --- SEGMENTATION LOGIC ---
	cuts_v1 = [] # Track 1 (Main / Left)
	cuts_v2 = [] # Track 2 (Secondary / Right)

	fps_float = float(timebase)

	# Store dynamic scale suggestion per cut if possible
	# (Not fully implemented per-cut yet, but we can compute a global or per-segment average if we stored it)

	if overlay_segments:
	current_frame = 0

	# Defaults (Normalized Centers)
	last_center_v1 = (0.5, 0.5)
	last_center_v2 = (0.5, 0.5)

	# We also want to track optimal scale for the segment
	last_opt_scale = None

	sorted_segs = sorted(overlay_segments, key=lambda x: x['start'])
	is_last_dual = False # Initialize is_last_dual

	for idx, seg in enumerate(sorted_segs):
	start_f = int(seg['start'] * fps_float)
	end_f = int(seg['end'] * fps_float)

	# Fill Gaps
	if start_f > current_frame:
	cuts_v1.append({"start": current_frame, "end": start_f, "center": last_center_v1, "opt_scale": last_opt_scale})
	if is_last_dual:
	cuts_v2.append({"start": current_frame, "end": start_f, "center": last_center_v2, "opt_scale": last_opt_scale})
	pass

	# Analyze Faces
	segment_faces = []
	frame_count = 0
	dual_face_frames = 0

	for f_idx in range(start_f, end_f):
	if f_idx in faces_per_frame:
	fs = faces_per_frame[f_idx]
	segment_faces.append(fs)
	if len(fs) >= 2:
	dual_face_frames += 1
	frame_count += 1

	is_dual_track = False
	if frame_count > 0:
	dual_ratio = dual_face_frames / frame_count
	if dual_ratio > 0.3:
	is_dual_track = True
	elif frame_count < 15 and dual_face_frames > 0:
	is_dual_track = True

	center_v1 = last_center_v1
	center_v2 = last_center_v2

	# Coordinate lists for mode calculation
	cand_v1_x, cand_v1_y = [], []
	cand_v2_x, cand_v2_y = [], []
	cand_rh = [] # Relative heights

	if segment_faces:
	for fs in segment_faces:
	# Filter Top 2 by Area
	top_faces = sorted(fs, key=lambda x: x['area'], reverse=True)[:2]
	# Sort by X (Left to Right)
	fs_sorted = sorted(top_faces, key=lambda x: x['nx'])

	if is_dual_track and len(fs_sorted) >= 2:
	# Left -> V2 (Top Track, Upper Screen)
	# Right -> V1 (Bottom Track, Lower Screen)
	f_left = fs_sorted[0]
	f_right = fs_sorted[-1]

	cand_rh.append(f_left.get('rh', 0.1))
	cand_rh.append(f_right.get('rh', 0.1))

	if abs(f_left['nx'] - f_right['nx']) < 0.20:
	# Fallback to single
	f_main = max(fs, key=lambda x: x['area'])
	cand_v1_x.append(f_main['nx'])
	cand_v1_y.append(f_main['ny'])
	if 'rh' in f_main: cand_rh[-2:] = [f_main['rh']]
	else:
	# Swap Assignment Here:
	# Left Face -> V2 (Top)
	cand_v2_x.append(f_left['nx'])
	cand_v2_y.append(f_left['ny'])

	# Right Face -> V1 (Bottom)
	cand_v1_x.append(f_right['nx'])
	cand_v1_y.append(f_right['ny'])

	elif fs_sorted:
	# Single -> V1
	f1 = max(fs_sorted, key=lambda x: x['area'])
	cand_v1_x.append(f1['nx'])
	cand_v1_y.append(f1['ny'])
	cand_rh.append(f1.get('rh', 0.1))

	# Smart Scale Logic REMOVED per user request
	# We will rely on strict "Fill Split Pane Height" logic in make_video_track
	opt_scale = None
	last_opt_scale = None

	# Apply Mode (Robust avg)
	def get_mode_avg(vals):
	if not vals: return 0.5
	try: return statistics.mean(vals)
	except: return vals[0]

	# If after filtering we have no valid V2 candidates, revert to Single Track
	if is_dual_track and not cand_v2_x:
	is_dual_track = False

	if cand_v1_x:
	center_v1 = (get_mode_avg(cand_v1_x), get_mode_avg(cand_v1_y))

	if is_dual_track:
	if cand_v2_x:
	center_v2 = (get_mode_avg(cand_v2_x), get_mode_avg(cand_v2_y))
	else:
	# This branch should rarely be hit now due to check above
	if last_center_v2 != (0.5, 0.5): center_v2 = last_center_v2
	else: center_v2 = (center_v1[0] + 0.25, center_v1[1])

	# Append Cuts
	cuts_v1.append({"start": start_f, "end": end_f, "center": center_v1, "opt_scale": opt_scale})

	if is_dual_track:
	cuts_v2.append({"start": start_f, "end": end_f, "center": center_v2, "opt_scale": opt_scale})
	last_center_v2 = center_v2
	is_last_dual = True
	else:
	is_last_dual = False

	last_center_v1 = center_v1
	current_frame = end_f

	# Final gap
	if current_frame < duration_frames:
	cuts_v1.append({"start": current_frame, "end": duration_frames, "center": last_center_v1, "opt_scale": last_opt_scale})

	else:
	cuts_v1.append({"start": 0, "end": duration_frames, "center": (0.5, 0.5), "opt_scale": None})

	print(f"Generated {len(cuts_v1)} V1 cuts and {len(cuts_v2)} V2 cuts.")

	# --- GENERATE XML TRACKS ---
	dual_starts = set(c['start'] for c in cuts_v2)

	def make_video_track(cuts_list, track_type="main"):
	items = ""
	for cut in cuts_list:
	seg_start, seg_end = cut['start'], cut['end']
	nx, ny = cut['center'] # These are Normalized Source Coords (0..1)

	if seg_end - seg_start <= 0: continue

	is_dual = (seg_start in dual_starts)

	# --- DIMENSION CHECKS ---
	src_w = float(source_width)
	src_h = float(source_height)
	if src_h < 100: src_h = 1080.0 # Safety default

	# --- SCALE LOGIC ---
	# Fill Sequence Height (Matches User's Request for correct scaling)
	# Use the actual Sequence Height passed to create_premiere_xml
	# Fill Sequence Height (Matches User's Request for correct scaling)
	# Use the actual Sequence Height passed to create_premiere_xml
	target_h = float(height)

	# ALWAYS scale to fill the sequence height
	final_scale = (target_h / src_h) * 100.0

	if final_scale < 10.0: final_scale = 100.0

	s_val = final_scale / 100.0

	# --- POSITIONING LOGIC (Shift-Based) ---
	# We assume Anchor Point is (0,0) -> CENTER of Clip.
	# We want to move the Face (nx, ny) to the Target Screen Position.

	# 1. Face Offset from Clip Center (in Source Pixels)
	# Center of Source is 0.5, 0.5
	off_x_src = (nx - 0.5) * src_w
	off_y_src = (ny - 0.5) * src_h

	# 2. Face Offset in Screen Pixels (after Scale)
	off_x_seq = off_x_src * s_val
	off_y_seq = off_y_src * s_val

	# 3. Target Screen Position (Pixels)
	# Sequence Dimensions: width, height (e.g. 1080, 1920)
	target_screen_x = 0.5 * width # Center X
	target_screen_y = 0.5 * height # Center Y (Default)

	if track_type == "secondary":
	target_screen_y = 0.25 * height # Top Quarter
	elif track_type == "main" and is_dual:
	target_screen_y = 0.75 * height # Bottom Quarter

	# 4. Required Clip Center Position
	# To place Face at Target, we shift Clip Center by -Offset
	req_center_x = target_screen_x - off_x_seq
	req_center_y = target_screen_y - off_y_seq

	# 5. Normalize for XML (0..1 relative to Sequence)
	# XML Coordinate System is Relative to Center (0,0 is Center).
	# Absolute 0..1 maps to -0.5..0.5 in XML.
	pos_h = (req_center_x / float(width)) - 0.5
	pos_v = (req_center_y / float(height)) - 0.5

	seg_id = f"clipitem-video-{get_uid()}"

	# EXPLICITLY REMOVE Anchor Point (centerOffset) to use Default (Center of Clip).
	# We calculate pos_h/pos_v assuming we are placing the Clip Center.

	basic_motion = f"""<filter><effect><name>Basic Motion</name><effectid>basic</effectid><effectcategory>motion</effectcategory><effecttype>motion</effecttype><mediatype>video</mediatype><parameter authoringApp="PremierePro"><parameterid>scale</parameterid><name>Scale</name><value>{final_scale:.2f}</value></parameter><parameter authoringApp="PremierePro"><parameterid>center</parameterid><name>Center</name><value><horiz>{pos_h:.5f}</horiz><vert>{pos_v:.5f}</vert></value></parameter></effect></filter>"""

	# --- CROP LOGIC ---
	crop_xml = ""
	if track_type == "secondary":
	crop_xml = f"""<filter><effect><name>Crop</name><effectid>crop</effectid><effectcategory>transform</effectcategory><effecttype>video</effecttype><mediatype>video</mediatype><parameter authoringApp="PremierePro"><parameterid>bottom</parameterid><name>Bottom</name><value>50.0</value></parameter></effect></filter>"""
	elif track_type == "main" and is_dual:
	crop_xml = f"""<filter><effect><name>Crop</name><effectid>crop</effectid><effectcategory>transform</effectcategory><effecttype>video</effecttype><mediatype>video</mediatype><parameter authoringApp="PremierePro"><parameterid>top</parameterid><name>Top</name><value>50.0</value></parameter></effect></filter>"""

	items += f"""<clipitem id="{seg_id}"><name>{os.path.basename(video_path)}</name><duration>{duration_frames}</duration><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><start>{seg_start}</start><end>{seg_end}</end><in>{seg_start}</in><out>{seg_end}</out>{get_file_block(video_file_id, video_path)}{basic_motion}{crop_xml}</clipitem>"""
	return f"<track>{items}</track>"

	track_v1 = make_video_track(cuts_v1, "main")
	track_v2 = make_video_track(cuts_v2, "secondary")

	# --- OVERLAY TRACK ---
	track_overlay_block = ""
	if overlay_segments:
	overlay_clips = ""
	for seg in overlay_segments:
	# ... (overlay logic same as before)
	# Re-implement simple loop here to ensure variable scope
	start_f = int(seg['start'] * fps_float)
	end_f = int(seg['end'] * fps_float)
	clip_dur = end_f - start_f
	if clip_dur <= 0: continue
	ov_fid = f"file-ov-{seg['index']}-{get_uid()}"
	ov_cid = f"clip-ov-{seg['index']}-{get_uid()}"
	file_blk = f"""<file id="{ov_fid}"><name>{os.path.basename(seg['path'])}</name><pathurl>{seg['path']}</pathurl><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><duration>{clip_dur}</duration><media><video><samplecharacteristics><width>{width}</width><height>{height}</height><alpha>straight</alpha></samplecharacteristics></video></media></file>"""
	overlay_clips += f"""<clipitem id="{ov_cid}"><name>{os.path.basename(seg['path'])}</name><duration>{clip_dur}</duration><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><start>{start_f}</start><end>{end_f}</end><in>0</in><out>{clip_dur}</out>{file_blk}<compositemode>normal</compositemode></clipitem>"""
	track_overlay_block = f"<track>{overlay_clips}</track>"
	else:
	track_overlay_block = "<track></track>"

	# --- ASSEMBLE ---
	timecode_block = f"""<timecode><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><string>00:00:00:00</string><frame>0</frame><displayformat>NDF</displayformat></timecode>"""
	audio_blk = f"""<track><clipitem id="{audio_file_id}"><name>{os.path.basename(video_path)}</name><duration>{duration_frames}</duration><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><start>0</start><end>{duration_frames}</end>{get_file_block(video_file_id, video_path)}<sourcetrack><mediatype>audio</mediatype><trackindex>1</trackindex></sourcetrack></clipitem></track>"""

	return f"""<?xml version="1.0" encoding="UTF-8"?><xmeml version="4"><sequence id="{sequence_uuid}"><name>{project_name}_CutRef</name><duration>{duration_frames}</duration><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate>{timecode_block}<media><video><format><samplecharacteristics><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><width>{width}</width><height>{height}</height><pixelaspectratio>square</pixelaspectratio></samplecharacteristics></format>{track_v1}{track_v2}{track_overlay_block}</video><audio>{audio_blk}</audio></media></sequence></xmeml>"""