Update srt_utils.py
Browse files- srt_utils.py +10 -15
srt_utils.py
CHANGED
|
@@ -334,21 +334,16 @@ def process_audio_for_transcription(input_file: str) -> str:
|
|
| 334 |
# 4. volume=1.5 (Boost a bit globally)
|
| 335 |
|
| 336 |
# "Retire qualquer música de fundo" -> Extremely hard without AI like Spleeter.
|
| 337 |
-
#
|
| 338 |
-
#
|
| 339 |
-
#
|
| 340 |
-
# We can also use a 'compand' filter as a Noise Gate to silence anything below a threshold.
|
| 341 |
|
| 342 |
-
# Filter Chain Strategy:
|
| 343 |
-
# 1. Highpass (
|
| 344 |
-
# 2. Lowpass (
|
| 345 |
-
# 3.
|
| 346 |
-
# 4. Dynaudnorm - Normalize volume of speech
|
| 347 |
|
| 348 |
-
|
| 349 |
-
# If not, we might fail, but let's try.
|
| 350 |
-
|
| 351 |
-
filters = "highpass=f=300,lowpass=f=3000,afftdn=nr=30:nf=-25,dynaudnorm=f=150:g=15"
|
| 352 |
|
| 353 |
try:
|
| 354 |
command = [
|
|
@@ -364,7 +359,7 @@ def process_audio_for_transcription(input_file: str) -> str:
|
|
| 364 |
output_file
|
| 365 |
]
|
| 366 |
|
| 367 |
-
print(f"🔊 Processando áudio com FFmpeg (
|
| 368 |
subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 369 |
|
| 370 |
if os.path.exists(output_file):
|
|
@@ -373,4 +368,4 @@ def process_audio_for_transcription(input_file: str) -> str:
|
|
| 373 |
except Exception as e:
|
| 374 |
print(f"⚠️ Falha ao processar áudio: {e}")
|
| 375 |
|
| 376 |
-
return input_file
|
|
|
|
| 334 |
# 4. volume=1.5 (Boost a bit globally)
|
| 335 |
|
| 336 |
# "Retire qualquer música de fundo" -> Extremely hard without AI like Spleeter.
|
| 337 |
+
# To reduce music impact without destroying voice, we use a Gentler Vocal EQ.
|
| 338 |
+
# We essentially attenuate frequencies where music dominates (Sub-bass, huge highs)
|
| 339 |
+
# and normalize volume using EBU R128 (loudnorm) which is more natural than dynaudnorm.
|
|
|
|
| 340 |
|
| 341 |
+
# Filter Chain Strategy (Simplified):
|
| 342 |
+
# 1. Highpass (200Hz) - Cut rumble/bass lines
|
| 343 |
+
# 2. Lowpass (8000Hz) - Keep up to 8kHz for clarity (s, t, p sounds), cut cymbal shimmer.
|
| 344 |
+
# 3. Loudnorm - Standardize volume without aggressive pumping.
|
|
|
|
| 345 |
|
| 346 |
+
filters = "highpass=f=200,lowpass=f=8000,loudnorm"
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
try:
|
| 349 |
command = [
|
|
|
|
| 359 |
output_file
|
| 360 |
]
|
| 361 |
|
| 362 |
+
print(f"🔊 Processando áudio com FFmpeg (Gentle EQ + Loudnorm): {' '.join(command)}")
|
| 363 |
subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 364 |
|
| 365 |
if os.path.exists(output_file):
|
|
|
|
| 368 |
except Exception as e:
|
| 369 |
print(f"⚠️ Falha ao processar áudio: {e}")
|
| 370 |
|
| 371 |
+
return input_file
|