Spaces:

Marek4321
/

StoryLens

Sleeping

File size: 1,760 Bytes

6bdfadc

from typing import List, Dict


class SegmentSynchronizer:
    def synchronize(
        self,
        frames: List[Dict],      # [{"timestamp": 0.0, "path": "...", "description": "..."}]
        transcript: List[Dict]   # [{"start": 0.0, "end": 3.2, "text": "..."}]
    ) -> List[Dict]:
        """
        Create unified segments with visual + speech.

        Returns:
            List of synchronized segments:
            [
                {
                    "start": 0.0,
                    "end": 2.0,
                    "frame_path": "/tmp/frame_001.jpg",
                    "visual": "Woman looking frustrated in kitchen",
                    "speech": "Tired of everyday exhaustion?"
                },
                ...
            ]
        """
        segments = []

        for i, frame in enumerate(frames):
            timestamp = frame['timestamp']

            # Calculate segment end (next frame timestamp or +interval)
            if i < len(frames) - 1:
                end_time = frames[i + 1]['timestamp']
            else:
                end_time = timestamp + 2.0  # Default interval

            # Find overlapping speech
            speech_text = ""
            for t in transcript:
                # Check if speech segment overlaps with this frame's time window
                if t['end'] > timestamp and t['start'] < end_time:
                    speech_text += " " + t['text']

            speech_text = speech_text.strip()

            segments.append({
                "start": timestamp,
                "end": end_time,
                "frame_path": frame['path'],
                "visual": frame['description'],
                "speech": speech_text if speech_text else None
            })

        return segments