Spaces:

hidenoriyamano37
/

srtLineBreakTool

Runtime error

App Files Files Community

hiDenorIYamano commited on Sep 18, 2023

Commit

cd646ca

1 Parent(s): 4b01a04

ホワイトリスト追加、最大行数追加

Browse files

Files changed (1) hide show

app.py +29 -8

app.py CHANGED Viewed

@@ -8,6 +8,9 @@ def split_japanese_line(line, max_length):
     if len(line) < max_length:
         return line
     m = MeCab.Tagger()
     nodes = m.parse(line).split("\n")
@@ -18,13 +21,24 @@ def split_japanese_line(line, max_length):
     lines = []
     current_line = ""
-    for idx, word in enumerate(words):
         # If adding the next word exceeds the max_length or if the word is a particle, break the line
-        if len(word_features) > idx - 1:
-            if word_features[idx - 1] == "助詞-格助詞":
                 lines.append(current_line)
                 current_line = ""
-        current_line += word
     # Append the last line if it exists
     if current_line:
@@ -32,13 +46,20 @@ def split_japanese_line(line, max_length):
     # Merge lines to ensure each line is less than or equal to max_length
     merged_lines = []
-    temp_line = ""
     for line in lines:
-        if len(temp_line + line) <= max_length:
             temp_line += line
         else:
-            merged_lines.append(temp_line)
-            temp_line = line
     if temp_line:
         merged_lines.append(temp_line)

     if len(line) < max_length:
         return line
+    max_line = 2
+    line = re.sub(r'[、。]', '', line)
     m = MeCab.Tagger()
     nodes = m.parse(line).split("\n")
     lines = []
     current_line = ""
+    idx = 0
+    while idx < len(words):
+        word = words[idx]
         # If adding the next word exceeds the max_length or if the word is a particle, break the line
+        if idx + 1 < len(words) and any(
+                whitelisted_word == word + words[idx + 1] for whitelisted_word in ["という", "っていう", "になって"]):
+            current_line += word + words[idx + 1]
+            idx += 2  # Increment to skip next word
+        elif len(word_features) > idx - 1:
+            if word_features[idx - 1] in ["助詞-格助詞", "助詞-副助詞", "助詞-終助詞"]:
                 lines.append(current_line)
                 current_line = ""
+            current_line += word
+            idx += 1
+        else:
+            current_line += word
+            idx += 1
     # Append the last line if it exists
     if current_line:
     # Merge lines to ensure each line is less than or equal to max_length
     merged_lines = []
+    temp_line = ''
     for line in lines:
+        if len(temp_line) <= 12:
+            temp_line += line
+        elif len(temp_line + line) <= max_length:
+            temp_line += line
+        elif len(merged_lines) >= max_line - 1:
             temp_line += line
         else:
+            if temp_line == '':
+                merged_lines.append(line)
+            else:
+                merged_lines.append(temp_line)
+                temp_line = line
     if temp_line:
         merged_lines.append(temp_line)