habulaj commited on
Commit
7e9cc06
·
verified ·
1 Parent(s): 850834b

Update srt_utils.py

Browse files
Files changed (1) hide show
  1. srt_utils.py +35 -15
srt_utils.py CHANGED
@@ -163,27 +163,47 @@ def apply_netflix_style_filter(srt_content):
163
  current_duration = last_word['end'] - current_group[0]['start']
164
  new_duration_proj = word['end'] - current_group[0]['start']
165
 
166
- is_too_long_char = len(new_text_proj) > MAX_TOTAL_CHARS
167
- is_too_long_dur = new_duration_proj > MAX_DURATION
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
- if is_too_long_char or is_too_long_dur:
 
170
  grouped_events.append(current_group)
171
  current_group = [word]
172
  continue
173
 
174
- # 3. Check Sentence Endings
 
175
  if re.search(r'[.!?]$', last_word['text']):
176
- # It's a sentence end.
177
- # Only merge if the combined total is fitting well (e.g. single line)
178
- # Netflix prefers sentence breaks.
179
- # If new_text_proj fits in ONE line, maybe merge? (e.g. "Yes. I do.")
180
- # If it forces TWO lines, prefer split.
181
- if len(new_text_proj) > MAX_CHARS_PER_LINE:
182
- grouped_events.append(current_group)
183
- current_group = [word]
184
- continue
185
-
186
- # 4. Line split lookahead (Advanced - skipped for now, relied on format_text_lines)
187
  current_group.append(word)
188
 
189
  if current_group:
 
163
  current_duration = last_word['end'] - current_group[0]['start']
164
  new_duration_proj = word['end'] - current_group[0]['start']
165
 
166
+ # New Logic: Prefer single lines
167
+ # If adding the word exceeds 42 chars (MAX_CHARS_PER_LINE)
168
+ if len(new_text_proj) > MAX_CHARS_PER_LINE:
169
+ # We are crossing the single line boundary.
170
+ # Check if we SHOULD split now or allow 2 lines.
171
+
172
+ # Reasons to split (make a new subtitle):
173
+ # A. Current subtitle is already "long enough" in duration (> 1s)
174
+ is_long_enough_dur = current_duration > 1.0
175
+
176
+ # B. Current subtitle is a complete sentence?
177
+ # (Handled by step 3, but this is size check)
178
+
179
+ # C. The projected text is HUGE (e.g. > 70 chars).
180
+ # Netflix allows up to 84 (2 lines), but user wants "separation".
181
+ # Let's cap at something smaller for 2 lines, e.g. 70.
182
+ is_too_huge = len(new_text_proj) > 70
183
+
184
+ # If it's long enough duration OR becoming huge -> BREAK
185
+ if is_long_enough_dur or is_too_huge:
186
+ grouped_events.append(current_group)
187
+ current_group = [word]
188
+ continue
189
+
190
+ # Otherwise, allow merging into 2nd line (e.g. fast speech, short duration)
191
 
192
+ # Check absolute absolute URL limit (MAX_TOTAL_CHARS) just in case
193
+ if len(new_text_proj) > MAX_TOTAL_CHARS or new_duration_proj > MAX_DURATION:
194
  grouped_events.append(current_group)
195
  current_group = [word]
196
  continue
197
 
198
+ # 3. Check Sentence Endings (CRITICAL)
199
+ # If previous word was a sentence end, ALWAYS split, unless current group is tiny (<15 chars)
200
  if re.search(r'[.!?]$', last_word['text']):
201
+ # Exception: "No." (Very short). "again." (6 chars) will break.
202
+ if len(current_text) > 3:
203
+ grouped_events.append(current_group)
204
+ current_group = [word]
205
+ continue
206
+
 
 
 
 
 
207
  current_group.append(word)
208
 
209
  if current_group: