bigbossmonster commited on
Commit
a6d270c
·
verified ·
1 Parent(s): 7ef6011

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -13
app.py CHANGED
@@ -49,21 +49,47 @@ def parse_filename_to_ms(filename):
49
  return (h * 3600000) + (m * 60000) + (s * 1000) + ms
50
 
51
  def parse_srt(content: str):
52
- blocks = content.replace('\r\n', '\n').replace('\r', '\n').strip().split('\n\n')
 
 
 
 
 
53
  parsed = []
54
  for block in blocks:
55
- lines = block.split('\n')
56
- if len(lines) >= 2:
57
- time_line = lines[1]
58
- if '-->' in time_line:
59
- start_str = time_line.split('-->')[0].strip()
60
- text = " ".join(lines[2:]) if len(lines) > 2 else "[BLANK]"
61
- parsed.append({
62
- "id": lines[0],
63
- "time": time_line,
64
- "startTimeMs": parse_srt_time_to_ms(start_str),
65
- "text": text
66
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  return parsed
68
 
69
  def compress_image(image_bytes, quality=70, max_width=800):
 
49
  return (h * 3600000) + (m * 60000) + (s * 1000) + ms
50
 
51
  def parse_srt(content: str):
52
+ # Normalize line endings
53
+ content = content.replace('\r\n', '\n').replace('\r', '\n')
54
+
55
+ # Split by blocks (2 or more newlines) - Robust against triple newlines
56
+ blocks = re.split(r'\n\s*\n', content.strip())
57
+
58
  parsed = []
59
  for block in blocks:
60
+ # Clean lines
61
+ lines = [l.strip() for l in block.split('\n') if l.strip()]
62
+ if len(lines) < 2:
63
+ continue
64
+
65
+ # Robustly find timestamp line
66
+ time_line_index = -1
67
+ for idx, line in enumerate(lines):
68
+ if '-->' in line:
69
+ time_line_index = idx
70
+ break
71
+
72
+ if time_line_index != -1:
73
+ time_line = lines[time_line_index]
74
+
75
+ # Determine ID (usually line before time)
76
+ if time_line_index > 0:
77
+ s_id = lines[time_line_index - 1]
78
+ else:
79
+ s_id = str(len(parsed) + 1)
80
+
81
+ # Extract Text (everything after time)
82
+ text_lines = lines[time_line_index + 1:]
83
+ text = " ".join(text_lines) if text_lines else "[BLANK]"
84
+
85
+ start_str = time_line.split('-->')[0].strip()
86
+
87
+ parsed.append({
88
+ "id": s_id,
89
+ "time": time_line,
90
+ "startTimeMs": parse_srt_time_to_ms(start_str),
91
+ "text": text
92
+ })
93
  return parsed
94
 
95
  def compress_image(image_bytes, quality=70, max_width=800):