Spaces:

habulaj
/

subapi

Running

App Files Files Community

habulaj commited on 20 days ago

Commit

7e9cc06

verified ·

1 Parent(s): 850834b

Update srt_utils.py

Browse files

Files changed (1) hide show

srt_utils.py +35 -15

srt_utils.py CHANGED Viewed

@@ -163,27 +163,47 @@ def apply_netflix_style_filter(srt_content):
         current_duration = last_word['end'] - current_group[0]['start']
         new_duration_proj = word['end'] - current_group[0]['start']
-        is_too_long_char = len(new_text_proj) > MAX_TOTAL_CHARS
-        is_too_long_dur = new_duration_proj > MAX_DURATION
-        if is_too_long_char or is_too_long_dur:
             grouped_events.append(current_group)
             current_group = [word]
             continue
-        # 3. Check Sentence Endings
         if re.search(r'[.!?]$', last_word['text']):
-            # It's a sentence end.
-            # Only merge if the combined total is fitting well (e.g. single line)
-            # Netflix prefers sentence breaks.
-            # If new_text_proj fits in ONE line, maybe merge? (e.g. "Yes. I do.")
-            # If it forces TWO lines, prefer split.
-            if len(new_text_proj) > MAX_CHARS_PER_LINE:
-                grouped_events.append(current_group)
-                current_group = [word]
-                continue
-        # 4. Line split lookahead (Advanced - skipped for now, relied on format_text_lines)
         current_group.append(word)
     if current_group:

         current_duration = last_word['end'] - current_group[0]['start']
         new_duration_proj = word['end'] - current_group[0]['start']
+        # New Logic: Prefer single lines
+        # If adding the word exceeds 42 chars (MAX_CHARS_PER_LINE)
+        if len(new_text_proj) > MAX_CHARS_PER_LINE:
+            # We are crossing the single line boundary.
+            # Check if we SHOULD split now or allow 2 lines.
+            # Reasons to split (make a new subtitle):
+            # A. Current subtitle is already "long enough" in duration (> 1s)
+            is_long_enough_dur = current_duration > 1.0
+            # B. Current subtitle is a complete sentence?
+            # (Handled by step 3, but this is size check)
+            # C. The projected text is HUGE (e.g. > 70 chars).
+            # Netflix allows up to 84 (2 lines), but user wants "separation".
+            # Let's cap at something smaller for 2 lines, e.g. 70.
+            is_too_huge = len(new_text_proj) > 70
+            # If it's long enough duration OR becoming huge -> BREAK
+            if is_long_enough_dur or is_too_huge:
+                grouped_events.append(current_group)
+                current_group = [word]
+                continue
+            # Otherwise, allow merging into 2nd line (e.g. fast speech, short duration)
+        # Check absolute absolute URL limit (MAX_TOTAL_CHARS) just in case
+        if len(new_text_proj) > MAX_TOTAL_CHARS or new_duration_proj > MAX_DURATION:
             grouped_events.append(current_group)
             current_group = [word]
             continue
+        # 3. Check Sentence Endings (CRITICAL)
+        # If previous word was a sentence end, ALWAYS split, unless current group is tiny (<15 chars)
         if re.search(r'[.!?]$', last_word['text']):
+             # Exception: "No." (Very short). "again." (6 chars) will break.
+             if len(current_text) > 3:
+                 grouped_events.append(current_group)
+                 current_group = [word]
+                 continue
         current_group.append(word)
     if current_group: