habulaj commited on
Commit
d776c87
·
verified ·
1 Parent(s): fc9986c

Update srt_utils.py

Browse files
Files changed (1) hide show
  1. srt_utils.py +40 -3
srt_utils.py CHANGED
@@ -18,6 +18,28 @@ def seconds_to_srt_time(seconds):
18
  ms = int((seconds % 1) * 1000)
19
  return f"{hours:02d}:{minutes:02d}:{secs:02d},{ms:03d}"
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def parse_srt(srt_content):
22
  """Parses SRT content into a list of dictionaries. Returns VALIDATED list."""
23
  pattern = re.compile(r"(\d+)\s*\n([^-\n]+?) --> ([^-\n]+?)\s*\n((?:(?!\d+\s*\n\d{1,2}:\d{2}).+\n?)*)", re.MULTILINE)
@@ -298,7 +320,7 @@ import subprocess
298
  import shutil
299
  import os
300
 
301
- def process_audio_for_transcription(input_file: str, has_bg_music: bool = False) -> str:
302
  """
303
  Process audio to maximize speech clarity.
304
 
@@ -316,7 +338,13 @@ def process_audio_for_transcription(input_file: str, has_bg_music: bool = False)
316
 
317
  input_filename = os.path.basename(input_file)
318
  input_stem = os.path.splitext(input_filename)[0]
319
- final_output = os.path.join(output_dir, f"{input_stem}.processed.mp3")
 
 
 
 
 
 
320
 
321
  ffmpeg_cmd = shutil.which("ffmpeg")
322
  if not ffmpeg_cmd:
@@ -384,11 +412,20 @@ def process_audio_for_transcription(input_file: str, has_bg_music: bool = False)
384
  cmd_convert = [
385
  ffmpeg_cmd, "-y",
386
  "-i", vocals_path,
 
 
 
 
 
 
 
 
 
387
  "-ac", "1", "-ar", "16000",
388
  "-af", filter_chain,
389
  "-c:a", "libmp3lame", "-q:a", "2",
390
  final_output
391
- ]
392
 
393
  try:
394
  subprocess.run(cmd_convert, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 
18
  ms = int((seconds % 1) * 1000)
19
  return f"{hours:02d}:{minutes:02d}:{secs:02d},{ms:03d}"
20
 
21
+ def shift_srt_timestamps(srt_content, offset_seconds):
22
+ """Shifts all timestamps in SRT content by offset_seconds"""
23
+ subs = parse_srt(srt_content)
24
+ if not subs:
25
+ return srt_content
26
+
27
+ shifted_srt = ""
28
+ for i, sub in enumerate(subs, 1):
29
+ start = sub['start'] + offset_seconds
30
+ end = sub['end'] + offset_seconds
31
+
32
+ # Ensure non-negative
33
+ if start < 0: start = 0
34
+ if end < 1e-3: end = 1e-3 # avoid 0 overlap logic issues if possible
35
+
36
+ start_str = seconds_to_srt_time(start)
37
+ end_str = seconds_to_srt_time(end)
38
+
39
+ shifted_srt += f"{i}\n{start_str} --> {end_str}\n{sub['text']}\n\n"
40
+
41
+ return shifted_srt.strip()
42
+
43
  def parse_srt(srt_content):
44
  """Parses SRT content into a list of dictionaries. Returns VALIDATED list."""
45
  pattern = re.compile(r"(\d+)\s*\n([^-\n]+?) --> ([^-\n]+?)\s*\n((?:(?!\d+\s*\n\d{1,2}:\d{2}).+\n?)*)", re.MULTILINE)
 
320
  import shutil
321
  import os
322
 
323
+ def process_audio_for_transcription(input_file: str, has_bg_music: bool = False, time_start: float = None, time_end: float = None) -> str:
324
  """
325
  Process audio to maximize speech clarity.
326
 
 
338
 
339
  input_filename = os.path.basename(input_file)
340
  input_stem = os.path.splitext(input_filename)[0]
341
+
342
+ # Adicionar sufixo se houver corte, para evitar cache/conflito incorreto
343
+ suffix = ""
344
+ if time_start is not None: suffix += f"_s{int(time_start)}"
345
+ if time_end is not None: suffix += f"_e{int(time_end)}"
346
+
347
+ final_output = os.path.join(output_dir, f"{input_stem}{suffix}.processed.mp3")
348
 
349
  ffmpeg_cmd = shutil.which("ffmpeg")
350
  if not ffmpeg_cmd:
 
412
  cmd_convert = [
413
  ffmpeg_cmd, "-y",
414
  "-i", vocals_path,
415
+ ]
416
+
417
+ # Apply cutting if requested (Output seeking for accuracy)
418
+ if time_start is not None:
419
+ cmd_convert.extend(["-ss", str(time_start)])
420
+ if time_end is not None:
421
+ cmd_convert.extend(["-to", str(time_end)])
422
+
423
+ cmd_convert.extend([
424
  "-ac", "1", "-ar", "16000",
425
  "-af", filter_chain,
426
  "-c:a", "libmp3lame", "-q:a", "2",
427
  final_output
428
+ ])
429
 
430
  try:
431
  subprocess.run(cmd_convert, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)