leicam commited on
Commit
fb81b2f
·
verified ·
1 Parent(s): 21b6fcf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -2
app.py CHANGED
@@ -60,20 +60,33 @@ def parse_transcript_full(txt: str) -> List[Segment]:
60
  lines = [l.strip() for l in txt.splitlines() if l.strip()]
61
  results: List[Segment] = []
62
 
63
- pat_range = re.compile(r"^\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*[-—]\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s+(.*)$")
 
64
 
65
  for l in lines:
 
 
 
 
66
  m = pat_range.match(l)
67
  if m:
68
  s, e, text = m.groups()
 
 
 
 
 
 
69
  try:
70
  s_f = parse_timecode_to_frames(s)
71
  e_f = parse_timecode_to_frames(e)
72
  if e_f > s_f:
73
  results.append(Segment(s, e, s_f, e_f, text, 0.0))
74
- except Exception:
 
75
  continue
76
 
 
77
  return results
78
 
79
  # ============ MANUAL TIMECODES ============
 
60
  lines = [l.strip() for l in txt.splitlines() if l.strip()]
61
  results: List[Segment] = []
62
 
63
+ # Aceita vários formatos: com ou sem colchetes, - ou —
64
+ pat_range = re.compile(r"^\[?\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*[-—–]\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*\]?\s*(.*)$")
65
 
66
  for l in lines:
67
+ # Pula linhas com apenas "Desconhecido"
68
+ if l.strip() == "Desconhecido":
69
+ continue
70
+
71
  m = pat_range.match(l)
72
  if m:
73
  s, e, text = m.groups()
74
+ text = text.strip()
75
+
76
+ # Pula se não tiver texto
77
+ if not text or text == "Desconhecido":
78
+ continue
79
+
80
  try:
81
  s_f = parse_timecode_to_frames(s)
82
  e_f = parse_timecode_to_frames(e)
83
  if e_f > s_f:
84
  results.append(Segment(s, e, s_f, e_f, text, 0.0))
85
+ except Exception as ex:
86
+ print(f"Erro ao processar linha: {l[:50]}... -> {ex}")
87
  continue
88
 
89
+ print(f"✓ {len(results)} segmentos encontrados na transcrição")
90
  return results
91
 
92
  # ============ MANUAL TIMECODES ============