lucamartinelli commited on
Commit
ef51d6b
Β·
1 Parent(s): 6672a34

Validazione

Browse files
Files changed (1) hide show
  1. src/vtt_utils.py +43 -0
src/vtt_utils.py CHANGED
@@ -95,6 +95,49 @@ def validate_vtt(vtt_content: str) -> Tuple[str, str]:
95
  "warning",
96
  )
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  return "🟒 Valid", "success"
99
  except Exception as e:
100
  return f"πŸ”΄ Validation error: {str(e)}", "error"
 
95
  "warning",
96
  )
97
 
98
+ # Check for punctuation followed by lowercase
99
+ last_char = None
100
+ last_line_num = None
101
+
102
+ for i, line in enumerate(lines):
103
+ if "-->" not in line:
104
+ continue
105
+
106
+ # Get text lines for this cue
107
+ j = i + 1
108
+ while j < len(lines):
109
+ content_line = lines[j]
110
+ if "-->" in content_line:
111
+ break
112
+ if content_line.strip() == "":
113
+ break
114
+
115
+ # Process text line
116
+ # Remove speaker tag for validation
117
+ clean_text = re.sub(r"<v\s+[^>]+>", "", content_line).strip()
118
+
119
+ if clean_text:
120
+ # Check internal to the line
121
+ match = re.search(r"([.!?])\s+([a-z])", clean_text)
122
+ if match:
123
+ return (
124
+ f"🟑 Warning: Punctuation followed by lowercase at line {j + 1}",
125
+ "warning",
126
+ )
127
+
128
+ # Check across boundary
129
+ if last_char and last_char in ".!?":
130
+ if clean_text[0].islower():
131
+ return (
132
+ f"🟑 Warning: Punctuation followed by lowercase across lines {last_line_num} and {j + 1}",
133
+ "warning",
134
+ )
135
+
136
+ last_char = clean_text[-1]
137
+ last_line_num = j + 1
138
+
139
+ j += 1
140
+
141
  return "🟒 Valid", "success"
142
  except Exception as e:
143
  return f"πŸ”΄ Validation error: {str(e)}", "error"