habulaj commited on
Commit
4541639
·
verified ·
1 Parent(s): 7e9cc06

Update srt_utils.py

Browse files
Files changed (1) hide show
  1. srt_utils.py +32 -25
srt_utils.py CHANGED
@@ -80,36 +80,43 @@ def format_text_lines(text, max_chars=42):
80
 
81
  def fix_word_timing(words):
82
  """
83
- Ensures words are sequential in time (no overlaps) and preserves text order.
 
 
 
 
84
  """
85
  if not words: return []
86
 
87
- fixed_words = []
88
- last_end = 0.0
89
 
90
- for word in words:
91
- start = word['start']
92
- end = word['end']
93
- duration = end - start
94
- if duration < 0.01: duration = 0.01 # Minimal sanity check
95
-
96
- # 1. Start must be >= last_end (Sequential constraint)
97
- # However, if 'start' is significantly later (silence), keep 'start'.
98
- # If 'start' is before 'last_end' (overlap), push 'start' to 'last_end'.
99
-
100
- if start < last_end:
101
- start = last_end
102
-
103
- # 2. Recalculate end
104
- end = start + duration
105
-
106
- word['start'] = start
107
- word['end'] = end
108
-
109
- fixed_words.append(word)
110
- last_end = end
111
 
112
- return fixed_words
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  def apply_netflix_style_filter(srt_content):
115
  """
 
80
 
81
  def fix_word_timing(words):
82
  """
83
+ Ensures words are sequential in time.
84
+ Strategy:
85
+ 1. If overlaps, prefer trimming the END of the previous word to preserve the START of the current word.
86
+ 2. Only delay the current word if the previous word would become too short or inverted.
87
+ 3. Ensure minimum duration for all words.
88
  """
89
  if not words: return []
90
 
91
+ # We edit in place / return modified list
 
92
 
93
+ for i in range(1, len(words)):
94
+ prev = words[i-1]
95
+ curr = words[i]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ # Check for overlap
98
+ if curr['start'] < prev['end']:
99
+ # Overlap detected.
100
+ # Try to trim prev['end'] to match curr['start']
101
+
102
+ # Check if trimming leaves prev with enough time? (e.g. > 0s)
103
+ # Actually, standard logic: just clamp prev end.
104
+ new_prev_end = max(prev['start'], curr['start'])
105
+
106
+ # If trimming makes it zero/negative (meaning curr starts BEFORE prev starts),
107
+ # then we adhere to sequential text order implies we MUST delay curr.
108
+ if new_prev_end <= prev['start'] + 0.01:
109
+ # Impossible to trim prev enough. Push curr.
110
+ curr['start'] = prev['end']
111
+ else:
112
+ # Trim prev
113
+ prev['end'] = new_prev_end
114
+
115
+ # Ensure curr has valid duration
116
+ if curr['end'] <= curr['start']:
117
+ curr['end'] = curr['start'] + 0.1 # Minimum duration 100ms
118
+
119
+ return words
120
 
121
  def apply_netflix_style_filter(srt_content):
122
  """