File size: 20,038 Bytes
f7598da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
import os
import uuid
import statistics

def create_premiere_xml(project_name, video_path, overlay_segments, duration_frames, width=1080, height=1920, timebase=30, video_file_id=None, audio_file_id=None, scale_value=100.0, face_data=None, source_width=1920, source_height=1080):
    """

    Generates a Premiere Pro XML with segmented cuts, supporting Dual-Track (Split Screen) for multi-face scenarios.

    """
    
    def get_uid(): return str(uuid.uuid4())[:12]
    
    if not video_file_id: video_file_id = f"file-video-{get_uid()}"
    if not audio_file_id: audio_file_id = f"file-audio-{get_uid()}"
    sequence_uuid = str(uuid.uuid4())
    
    # helper for file blocks
    def get_file_block(fid, fpath, is_audio_only=False):
       audio_blk = "" if is_audio_only else "<audio><samplecharacteristics><depth>16</depth><samplerate>48000</samplerate></samplecharacteristics><channelcount>2</channelcount></audio>"
       width_f = int(source_width)
       height_f = int(source_height)
       return f"""<file id="{fid}"><name>{os.path.basename(fpath)}</name><pathurl>{fpath}</pathurl><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><duration>{duration_frames}</duration><media><video><samplecharacteristics><width>{width_f}</width><height>{height_f}</height><alpha>straight</alpha></samplecharacteristics></video>{audio_blk}</media></file>"""

    # --- PROCESS FACE DATA (Per Frame) ---
    # We store raw faces per frame to decide clustering later
    faces_per_frame = {} 
    
    # Dimensions for Coordinate Normalization (Default to source if not in JSON)
    coords_w = source_width
    coords_h = source_height
    
    if face_data:
        # Check for Metadata in first entry to determine Coordinate System Scale
        if len(face_data) > 0:
             first_entry = face_data[0]
             if "src_size" in first_entry:
                 try:
                     w_json, h_json = first_entry["src_size"]
                     if w_json > 0 and h_json > 0:
                         coords_w = w_json
                         coords_h = h_json
                         print(f"Coordinate System Reference: {coords_w}x{coords_h}")
                         # DO NOT overwrite source_width/source_height (Actual Media Dims)
                 except: pass

        print(f"Processing {len(face_data)} face entries for Dual-Track logic...")
        for entry in face_data:
            f_idx = entry.get('frame')
            faces = entry.get('faces', [])
            if not faces: continue
            
            processed_faces = []
            for f in faces:
                cx = (f[0] + f[2]) / 2.0
                cy = (f[1] + f[3]) / 2.0
                area = (f[2]-f[0]) * (f[3]-f[1])
                
                # Calculate Normalized Center using COORDS Dimensions
                # nx, ny are 0..1 relative to the original detection frame
                nx = cx / max(1.0, float(coords_w))
                ny = cy / max(1.0, float(coords_h))
                
                # rh uses coords_h
                rh_val = 0.1
                if len(f) > 4:
                    rh_val = float(f[4])
                else:
                    rh_val = (f[3] - f[1]) / max(1.0, float(coords_h))
                
                processed_faces.append({
                    'cx': cx, 
                    'cy': cy,
                    'nx': nx, 
                    'ny': ny,
                    'area': area,
                    'rh': rh_val 
                })
            
            faces_per_frame[f_idx] = processed_faces
    
    # Ensure source_width/height are floats for calculation later
    source_width = float(source_width)
    source_height = float(source_height)

    # --- SEGMENTATION LOGIC ---
    cuts_v1 = [] # Track 1 (Main / Left)
    cuts_v2 = [] # Track 2 (Secondary / Right)
    
    fps_float = float(timebase)
    
    # Store dynamic scale suggestion per cut if possible
    # (Not fully implemented per-cut yet, but we can compute a global or per-segment average if we stored it)
    
    if overlay_segments:
        current_frame = 0
        
        # Defaults (Normalized Centers)
        last_center_v1 = (0.5, 0.5)
        last_center_v2 = (0.5, 0.5)
        
        # We also want to track optimal scale for the segment
        last_opt_scale = None
        
        sorted_segs = sorted(overlay_segments, key=lambda x: x['start'])
        is_last_dual = False # Initialize is_last_dual

        for idx, seg in enumerate(sorted_segs):
            start_f = int(seg['start'] * fps_float)
            end_f = int(seg['end'] * fps_float)
            
            # Fill Gaps
            if start_f > current_frame:
                cuts_v1.append({"start": current_frame, "end": start_f, "center": last_center_v1, "opt_scale": last_opt_scale})
                if is_last_dual: 
                     cuts_v2.append({"start": current_frame, "end": start_f, "center": last_center_v2, "opt_scale": last_opt_scale})
                pass 
            
            # Analyze Faces
            segment_faces = []
            frame_count = 0
            dual_face_frames = 0
            
            for f_idx in range(start_f, end_f):
                if f_idx in faces_per_frame:
                    fs = faces_per_frame[f_idx]
                    segment_faces.append(fs)
                    if len(fs) >= 2:
                        dual_face_frames += 1
                frame_count += 1
            
            is_dual_track = False
            if frame_count > 0:
                dual_ratio = dual_face_frames / frame_count
                if dual_ratio > 0.3:
                    is_dual_track = True
                elif frame_count < 15 and dual_face_frames > 0:
                     is_dual_track = True
            
            center_v1 = last_center_v1
            center_v2 = last_center_v2
            
            # Coordinate lists for mode calculation
            cand_v1_x, cand_v1_y = [], []
            cand_v2_x, cand_v2_y = [], []
            cand_rh = [] # Relative heights
            
            if segment_faces:
                for fs in segment_faces:
                    # Filter Top 2 by Area
                    top_faces = sorted(fs, key=lambda x: x['area'], reverse=True)[:2]
                    # Sort by X (Left to Right)
                    fs_sorted = sorted(top_faces, key=lambda x: x['nx'])
                    
                    if is_dual_track and len(fs_sorted) >= 2:
                        # Left -> V2 (Top Track, Upper Screen)
                        # Right -> V1 (Bottom Track, Lower Screen)
                        f_left = fs_sorted[0]
                        f_right = fs_sorted[-1] 
                        
                        cand_rh.append(f_left.get('rh', 0.1))
                        cand_rh.append(f_right.get('rh', 0.1))
                        
                        if abs(f_left['nx'] - f_right['nx']) < 0.20:
                             # Fallback to single
                             f_main = max(fs, key=lambda x: x['area'])
                             cand_v1_x.append(f_main['nx'])
                             cand_v1_y.append(f_main['ny'])
                             if 'rh' in f_main: cand_rh[-2:] = [f_main['rh']]
                        else:
                            # Swap Assignment Here: 
                            # Left Face -> V2 (Top)
                            cand_v2_x.append(f_left['nx'])
                            cand_v2_y.append(f_left['ny'])
                            
                            # Right Face -> V1 (Bottom)
                            cand_v1_x.append(f_right['nx'])
                            cand_v1_y.append(f_right['ny'])
                        
                    elif fs_sorted:
                        # Single -> V1
                        f1 = max(fs_sorted, key=lambda x: x['area'])
                        cand_v1_x.append(f1['nx'])
                        cand_v1_y.append(f1['ny'])
                        cand_rh.append(f1.get('rh', 0.1))

            # Smart Scale Logic REMOVED per user request
            # We will rely on strict "Fill Split Pane Height" logic in make_video_track
            opt_scale = None
            last_opt_scale = None

            # Apply Mode (Robust avg)
            def get_mode_avg(vals):
                if not vals: return 0.5
                try: return statistics.mean(vals)
                except: return vals[0]
            
            # If after filtering we have no valid V2 candidates, revert to Single Track
            if is_dual_track and not cand_v2_x:
                is_dual_track = False
                
            if cand_v1_x:
                center_v1 = (get_mode_avg(cand_v1_x), get_mode_avg(cand_v1_y))
            
            if is_dual_track:
                if cand_v2_x:
                     center_v2 = (get_mode_avg(cand_v2_x), get_mode_avg(cand_v2_y))
                else:
                     # This branch should rarely be hit now due to check above
                     if last_center_v2 != (0.5, 0.5): center_v2 = last_center_v2
                     else: center_v2 = (center_v1[0] + 0.25, center_v1[1]) 
                
            # Append Cuts
            cuts_v1.append({"start": start_f, "end": end_f, "center": center_v1, "opt_scale": opt_scale})
            
            if is_dual_track:
                cuts_v2.append({"start": start_f, "end": end_f, "center": center_v2, "opt_scale": opt_scale})
                last_center_v2 = center_v2
                is_last_dual = True
            else:
                is_last_dual = False
            
            last_center_v1 = center_v1
            current_frame = end_f
            
        # Final gap
        if current_frame < duration_frames:
             cuts_v1.append({"start": current_frame, "end": duration_frames, "center": last_center_v1, "opt_scale": last_opt_scale})

    else:
        cuts_v1.append({"start": 0, "end": duration_frames, "center": (0.5, 0.5), "opt_scale": None})

    print(f"Generated {len(cuts_v1)} V1 cuts and {len(cuts_v2)} V2 cuts.")

    # --- GENERATE XML TRACKS ---
    dual_starts = set(c['start'] for c in cuts_v2)
    
    def make_video_track(cuts_list, track_type="main"):
        items = ""
        for cut in cuts_list:
            seg_start, seg_end = cut['start'], cut['end']
            nx, ny = cut['center'] # These are Normalized Source Coords (0..1)
            
            if seg_end - seg_start <= 0: continue
            
            is_dual = (seg_start in dual_starts)
            
            # --- DIMENSION CHECKS ---
            src_w = float(source_width)
            src_h = float(source_height)
            if src_h < 100: src_h = 1080.0 # Safety default
            
            # --- SCALE LOGIC ---
            # Fill Sequence Height (Matches User's Request for correct scaling)
            # Use the actual Sequence Height passed to create_premiere_xml
            target_h = float(height)
            
            # ALWAYS scale to fill the sequence height
            final_scale = (target_h / src_h) * 100.0
            
            # Boost scale for split screen to frame faces tighter (User request: "zoom is larger when split")
            if track_type == "secondary" or is_dual:
                final_scale *= 1.2

            if final_scale < 10.0: final_scale = 100.0
            
            s_val = final_scale / 100.0

            # --- POSITIONING LOGIC (Shift-Based) ---
            # We assume Anchor Point is (0,0) -> CENTER of Clip.
            # We want to move the Face (nx, ny) to the Target Screen Position.
            
            # 1. Face Offset from Clip Center (in Source Pixels)
            # Center of Source is 0.5, 0.5
            off_x_src = (nx - 0.5) * src_w
            off_y_src = (ny - 0.5) * src_h
            
            # 2. Face Offset in Screen Pixels (after Scale)
            off_x_seq = off_x_src * s_val
            off_y_seq = off_y_src * s_val
            
            # 3. Target Screen Position (Pixels)
            # Sequence Dimensions: width, height (e.g. 1080, 1920)
            target_screen_x = 0.5 * width # Center X
            target_screen_y = 0.5 * height # Center Y (Default)
            
            if track_type == "secondary": 
                target_screen_y = 0.25 * height # Top Quarter
            elif track_type == "main" and is_dual: 
                target_screen_y = 0.75 * height # Bottom Quarter
            
            # 4. Required Clip Center Position
            # To place Face at Target, we shift Clip Center by -Offset
            req_center_x = target_screen_x - off_x_seq
            req_center_y = target_screen_y - off_y_seq
            
            # 5. Normalize for XML (0..1 relative to Sequence)
            # XML Coordinate System is Relative to Center (0,0 is Center).
            # Absolute 0..1 maps to -0.5..0.5 in XML.
            pos_h = (req_center_x / float(width)) - 0.5
            pos_v = (req_center_y / float(height)) - 0.5
            
            seg_id = f"clipitem-video-{get_uid()}"
            
            # EXPLICITLY REMOVE Anchor Point (centerOffset) to use Default (Center of Clip).
            # We calculate pos_h/pos_v assuming we are placing the Clip Center.
            
            basic_motion = f"""<filter><effect><name>Basic Motion</name><effectid>basic</effectid><effectcategory>motion</effectcategory><effecttype>motion</effecttype><mediatype>video</mediatype><parameter authoringApp="PremierePro"><parameterid>scale</parameterid><name>Scale</name><value>{final_scale:.2f}</value></parameter><parameter authoringApp="PremierePro"><parameterid>center</parameterid><name>Center</name><value><horiz>{pos_h:.5f}</horiz><vert>{pos_v:.5f}</vert></value></parameter></effect></filter>"""
            
            # --- CROP LOGIC (Pane Masking) ---
            # We calculate crops based on the Screen Boundaries of the Pane.
            # This ensures the split line is perfectly respected.
            
            crop_xml = ""
            pane_top_y = 0.0
            pane_bottom_y = float(height) # Default Full Screen
            
            should_crop = False
            
            if track_type == "secondary":
                 # Top Pane (0.0 to 0.5)
                 pane_bottom_y = height / 2.0
                 should_crop = True
            elif track_type == "main" and is_dual:
                 # Bottom Pane (0.5 to 1.0)
                 pane_top_y = height / 2.0
                 should_crop = True
            
            if should_crop:
                 # 1. Calculate Clip's Screen Coordinates
                 # req_center_y is the Screen Y of the Clip Center
                 clip_screen_h = src_h * s_val
                 clip_top_screen_y = req_center_y - (clip_screen_h / 2.0)
                 clip_bottom_screen_y = req_center_y + (clip_screen_h / 2.0)
                 
                 # 2. Calculate Required Crop in Screen Pixels
                 # Pixels to remove from Top: Distance from ClipTop to PaneTop
                 # max(0, PaneTop - ClipTop)
                 crop_top_px = max(0.0, pane_top_y - clip_top_screen_y)
                 
                 # Pixels to remove from Bottom: Distance from PaneBottom to ClipBottom
                 # max(0, ClipBottom - PaneBottom)
                 crop_bottom_px = max(0.0, clip_bottom_screen_y - pane_bottom_y)
                 
                 # 3. Convert to Source Percentage
                 # CropPx / Scale = SourcePx
                 # SourcePx / SourceHeight * 100 = %
                 pct_top = (crop_top_px / s_val) / src_h * 100.0
                 pct_bottom = (crop_bottom_px / s_val) / src_h * 100.0
                 
                 # Clamp 0-100
                 pct_top = max(0.0, min(100.0, pct_top))
                 pct_bottom = max(0.0, min(100.0, pct_bottom))
                 
                 crop_parameters = ""
                 crop_parameters += f"""<parameter authoringApp="PremierePro"><parameterid>top</parameterid><name>Top</name><value>{pct_top:.2f}</value></parameter>"""
                 crop_parameters += f"""<parameter authoringApp="PremierePro"><parameterid>bottom</parameterid><name>Bottom</name><value>{pct_bottom:.2f}</value></parameter>"""
                 
                 crop_xml = f"""<filter><effect><name>Crop</name><effectid>crop</effectid><effectcategory>transform</effectcategory><effecttype>video</effecttype><mediatype>video</mediatype>{crop_parameters}</effect></filter>"""

            items += f"""<clipitem id="{seg_id}"><name>{os.path.basename(video_path)}</name><duration>{duration_frames}</duration><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><start>{seg_start}</start><end>{seg_end}</end><in>{seg_start}</in><out>{seg_end}</out>{get_file_block(video_file_id, video_path)}{basic_motion}{crop_xml}</clipitem>"""
        return f"<track>{items}</track>"

    track_v1 = make_video_track(cuts_v1, "main")
    track_v2 = make_video_track(cuts_v2, "secondary")

    # --- OVERLAY TRACK ---
    track_overlay_block = ""
    if overlay_segments:
        overlay_clips = ""
        for seg in overlay_segments:
            # ... (overlay logic same as before)
            # Re-implement simple loop here to ensure variable scope
            start_f = int(seg['start'] * fps_float)
            end_f = int(seg['end'] * fps_float)
            clip_dur = end_f - start_f
            if clip_dur <= 0: continue
            ov_fid = f"file-ov-{seg['index']}-{get_uid()}"
            ov_cid = f"clip-ov-{seg['index']}-{get_uid()}"
            file_blk = f"""<file id="{ov_fid}"><name>{os.path.basename(seg['path'])}</name><pathurl>{seg['path']}</pathurl><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><duration>{clip_dur}</duration><media><video><samplecharacteristics><width>{width}</width><height>{height}</height><alpha>straight</alpha></samplecharacteristics></video></media></file>"""
            overlay_clips += f"""<clipitem id="{ov_cid}"><name>{os.path.basename(seg['path'])}</name><duration>{clip_dur}</duration><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><start>{start_f}</start><end>{end_f}</end><in>0</in><out>{clip_dur}</out>{file_blk}<compositemode>normal</compositemode></clipitem>"""
        track_overlay_block = f"<track>{overlay_clips}</track>"
    else:
        track_overlay_block = "<track></track>"

    # --- ASSEMBLE ---
    timecode_block = f"""<timecode><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><string>00:00:00:00</string><frame>0</frame><displayformat>NDF</displayformat></timecode>"""
    audio_blk = f"""<track><clipitem id="{audio_file_id}"><name>{os.path.basename(video_path)}</name><duration>{duration_frames}</duration><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><start>0</start><end>{duration_frames}</end>{get_file_block(video_file_id, video_path)}<sourcetrack><mediatype>audio</mediatype><trackindex>1</trackindex></sourcetrack></clipitem></track>"""

    return f"""<?xml version="1.0" encoding="UTF-8"?><xmeml version="4"><sequence id="{sequence_uuid}"><name>{project_name}_CutRef</name><duration>{duration_frames}</duration><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate>{timecode_block}<media><video><format><samplecharacteristics><rate><timebase>{timebase}</timebase><ntsc>FALSE</ntsc></rate><width>{width}</width><height>{height}</height><pixelaspectratio>square</pixelaspectratio></samplecharacteristics></format>{track_v1}{track_v2}{track_overlay_block}</video><audio>{audio_blk}</audio></media></sequence></xmeml>"""