Spaces:

habulaj
/

subapi

Running

App Files Files Community

habulaj commited on Jan 27

Commit

1056927

verified ·

1 Parent(s): c2ce5f9

Update srt_utils.py

Browse files

Files changed (1) hide show

srt_utils.py +10 -15

srt_utils.py CHANGED Viewed

@@ -334,21 +334,16 @@ def process_audio_for_transcription(input_file: str) -> str:
     # 4. volume=1.5 (Boost a bit globally)
     # "Retire qualquer música de fundo" -> Extremely hard without AI like Spleeter.
-    # But aggressive vocal isolation via EQ helps.
-    # To avoid hallucinations during silence, we need to MUTE silence, not remove it (which breaks sync).
-    # We use 'afftdn' (FFT Denoise) to aggressively reduce background noise/music floor.
-    # We can also use a 'compand' filter as a Noise Gate to silence anything below a threshold.
-    # Filter Chain Strategy:
-    # 1. Highpass (300Hz) - Cut bass/rumble/drums
-    # 2. Lowpass (3000Hz) - Cut sibilance/high-hats
-    # 3. AFFTDN - Spectral noise reduction (aggressive)
-    # 4. Dynaudnorm - Normalize volume of speech
-    # Check if we can use afftdn (assuming recent ffmpeg from previous output)
-    # If not, we might fail, but let's try.
-    filters = "highpass=f=300,lowpass=f=3000,afftdn=nr=30:nf=-25,dynaudnorm=f=150:g=15"
     try:
         command = [
@@ -364,7 +359,7 @@ def process_audio_for_transcription(input_file: str) -> str:
             output_file
         ]
-        print(f"🔊 Processando áudio com FFmpeg (Noise Gate/Spectral Cleaning): {' '.join(command)}")
         subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
         if os.path.exists(output_file):
@@ -373,4 +368,4 @@ def process_audio_for_transcription(input_file: str) -> str:
     except Exception as e:
         print(f"⚠️ Falha ao processar áudio: {e}")
-    return input_file

     # 4. volume=1.5 (Boost a bit globally)
     # "Retire qualquer música de fundo" -> Extremely hard without AI like Spleeter.
+    # To reduce music impact without destroying voice, we use a Gentler Vocal EQ.
+    # We essentially attenuate frequencies where music dominates (Sub-bass, huge highs)
+    # and normalize volume using EBU R128 (loudnorm) which is more natural than dynaudnorm.
+    # Filter Chain Strategy (Simplified):
+    # 1. Highpass (200Hz) - Cut rumble/bass lines
+    # 2. Lowpass (8000Hz) - Keep up to 8kHz for clarity (s, t, p sounds), cut cymbal shimmer.
+    # 3. Loudnorm - Standardize volume without aggressive pumping.
+    filters = "highpass=f=200,lowpass=f=8000,loudnorm"
     try:
         command = [
             output_file
         ]
+        print(f"🔊 Processando áudio com FFmpeg (Gentle EQ + Loudnorm): {' '.join(command)}")
         subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
         if os.path.exists(output_file):
     except Exception as e:
         print(f"⚠️ Falha ao processar áudio: {e}")
+    return input_file