habulaj commited on
Commit
1056927
·
verified ·
1 Parent(s): c2ce5f9

Update srt_utils.py

Browse files
Files changed (1) hide show
  1. srt_utils.py +10 -15
srt_utils.py CHANGED
@@ -334,21 +334,16 @@ def process_audio_for_transcription(input_file: str) -> str:
334
  # 4. volume=1.5 (Boost a bit globally)
335
 
336
  # "Retire qualquer música de fundo" -> Extremely hard without AI like Spleeter.
337
- # But aggressive vocal isolation via EQ helps.
338
- # To avoid hallucinations during silence, we need to MUTE silence, not remove it (which breaks sync).
339
- # We use 'afftdn' (FFT Denoise) to aggressively reduce background noise/music floor.
340
- # We can also use a 'compand' filter as a Noise Gate to silence anything below a threshold.
341
 
342
- # Filter Chain Strategy:
343
- # 1. Highpass (300Hz) - Cut bass/rumble/drums
344
- # 2. Lowpass (3000Hz) - Cut sibilance/high-hats
345
- # 3. AFFTDN - Spectral noise reduction (aggressive)
346
- # 4. Dynaudnorm - Normalize volume of speech
347
 
348
- # Check if we can use afftdn (assuming recent ffmpeg from previous output)
349
- # If not, we might fail, but let's try.
350
-
351
- filters = "highpass=f=300,lowpass=f=3000,afftdn=nr=30:nf=-25,dynaudnorm=f=150:g=15"
352
 
353
  try:
354
  command = [
@@ -364,7 +359,7 @@ def process_audio_for_transcription(input_file: str) -> str:
364
  output_file
365
  ]
366
 
367
- print(f"🔊 Processando áudio com FFmpeg (Noise Gate/Spectral Cleaning): {' '.join(command)}")
368
  subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
369
 
370
  if os.path.exists(output_file):
@@ -373,4 +368,4 @@ def process_audio_for_transcription(input_file: str) -> str:
373
  except Exception as e:
374
  print(f"⚠️ Falha ao processar áudio: {e}")
375
 
376
- return input_file
 
334
  # 4. volume=1.5 (Boost a bit globally)
335
 
336
  # "Retire qualquer música de fundo" -> Extremely hard without AI like Spleeter.
337
+ # To reduce music impact without destroying voice, we use a Gentler Vocal EQ.
338
+ # We essentially attenuate frequencies where music dominates (Sub-bass, huge highs)
339
+ # and normalize volume using EBU R128 (loudnorm) which is more natural than dynaudnorm.
 
340
 
341
+ # Filter Chain Strategy (Simplified):
342
+ # 1. Highpass (200Hz) - Cut rumble/bass lines
343
+ # 2. Lowpass (8000Hz) - Keep up to 8kHz for clarity (s, t, p sounds), cut cymbal shimmer.
344
+ # 3. Loudnorm - Standardize volume without aggressive pumping.
 
345
 
346
+ filters = "highpass=f=200,lowpass=f=8000,loudnorm"
 
 
 
347
 
348
  try:
349
  command = [
 
359
  output_file
360
  ]
361
 
362
+ print(f"🔊 Processando áudio com FFmpeg (Gentle EQ + Loudnorm): {' '.join(command)}")
363
  subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
364
 
365
  if os.path.exists(output_file):
 
368
  except Exception as e:
369
  print(f"⚠️ Falha ao processar áudio: {e}")
370
 
371
+ return input_file