Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -60,20 +60,33 @@ def parse_transcript_full(txt: str) -> List[Segment]:
|
|
| 60 |
lines = [l.strip() for l in txt.splitlines() if l.strip()]
|
| 61 |
results: List[Segment] = []
|
| 62 |
|
| 63 |
-
|
|
|
|
| 64 |
|
| 65 |
for l in lines:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
m = pat_range.match(l)
|
| 67 |
if m:
|
| 68 |
s, e, text = m.groups()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
try:
|
| 70 |
s_f = parse_timecode_to_frames(s)
|
| 71 |
e_f = parse_timecode_to_frames(e)
|
| 72 |
if e_f > s_f:
|
| 73 |
results.append(Segment(s, e, s_f, e_f, text, 0.0))
|
| 74 |
-
except Exception:
|
|
|
|
| 75 |
continue
|
| 76 |
|
|
|
|
| 77 |
return results
|
| 78 |
|
| 79 |
# ============ MANUAL TIMECODES ============
|
|
|
|
| 60 |
lines = [l.strip() for l in txt.splitlines() if l.strip()]
|
| 61 |
results: List[Segment] = []
|
| 62 |
|
| 63 |
+
# Aceita vários formatos: com ou sem colchetes, - ou —
|
| 64 |
+
pat_range = re.compile(r"^\[?\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*[-—–]\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*\]?\s*(.*)$")
|
| 65 |
|
| 66 |
for l in lines:
|
| 67 |
+
# Pula linhas com apenas "Desconhecido"
|
| 68 |
+
if l.strip() == "Desconhecido":
|
| 69 |
+
continue
|
| 70 |
+
|
| 71 |
m = pat_range.match(l)
|
| 72 |
if m:
|
| 73 |
s, e, text = m.groups()
|
| 74 |
+
text = text.strip()
|
| 75 |
+
|
| 76 |
+
# Pula se não tiver texto
|
| 77 |
+
if not text or text == "Desconhecido":
|
| 78 |
+
continue
|
| 79 |
+
|
| 80 |
try:
|
| 81 |
s_f = parse_timecode_to_frames(s)
|
| 82 |
e_f = parse_timecode_to_frames(e)
|
| 83 |
if e_f > s_f:
|
| 84 |
results.append(Segment(s, e, s_f, e_f, text, 0.0))
|
| 85 |
+
except Exception as ex:
|
| 86 |
+
print(f"Erro ao processar linha: {l[:50]}... -> {ex}")
|
| 87 |
continue
|
| 88 |
|
| 89 |
+
print(f"✓ {len(results)} segmentos encontrados na transcrição")
|
| 90 |
return results
|
| 91 |
|
| 92 |
# ============ MANUAL TIMECODES ============
|