Spaces:

habulaj
/

subapi

Running

App Files Files Community

habulaj commited on Jan 26

Commit

4541639

verified ·

1 Parent(s): 7e9cc06

Update srt_utils.py

Browse files

Files changed (1) hide show

srt_utils.py +32 -25

srt_utils.py CHANGED Viewed

@@ -80,36 +80,43 @@ def format_text_lines(text, max_chars=42):
 def fix_word_timing(words):
     """
-    Ensures words are sequential in time (no overlaps) and preserves text order.
     """
     if not words: return []
-    fixed_words = []
-    last_end = 0.0
-    for word in words:
-        start = word['start']
-        end = word['end']
-        duration = end - start
-        if duration < 0.01: duration = 0.01 # Minimal sanity check
-        # 1. Start must be >= last_end (Sequential constraint)
-        # However, if 'start' is significantly later (silence), keep 'start'.
-        # If 'start' is before 'last_end' (overlap), push 'start' to 'last_end'.
-        if start < last_end:
-            start = last_end
-        # 2. Recalculate end
-        end = start + duration
-        word['start'] = start
-        word['end'] = end
-        fixed_words.append(word)
-        last_end = end
-    return fixed_words
 def apply_netflix_style_filter(srt_content):
     """

 def fix_word_timing(words):
     """
+    Ensures words are sequential in time.
+    Strategy:
+    1. If overlaps, prefer trimming the END of the previous word to preserve the START of the current word.
+    2. Only delay the current word if the previous word would become too short or inverted.
+    3. Ensure minimum duration for all words.
     """
     if not words: return []
+    # We edit in place / return modified list
+    for i in range(1, len(words)):
+        prev = words[i-1]
+        curr = words[i]
+        # Check for overlap
+        if curr['start'] < prev['end']:
+            # Overlap detected.
+            # Try to trim prev['end'] to match curr['start']
+            # Check if trimming leaves prev with enough time? (e.g. > 0s)
+            # Actually, standard logic: just clamp prev end.
+            new_prev_end = max(prev['start'], curr['start'])
+            # If trimming makes it zero/negative (meaning curr starts BEFORE prev starts),
+            # then we adhere to sequential text order implies we MUST delay curr.
+            if new_prev_end <= prev['start'] + 0.01:
+                # Impossible to trim prev enough. Push curr.
+                curr['start'] = prev['end']
+            else:
+                # Trim prev
+                prev['end'] = new_prev_end
+        # Ensure curr has valid duration
+        if curr['end'] <= curr['start']:
+            curr['end'] = curr['start'] + 0.1 # Minimum duration 100ms
+    return words
 def apply_netflix_style_filter(srt_content):
     """