Spaces:

habulaj
/

subapi

Running

App Files Files Community

habulaj commited on 19 days ago

Commit

cfa7b66

verified ·

1 Parent(s): 8c42e32

Update srt_utils.py

Browse files

Files changed (1) hide show

srt_utils.py +69 -1

srt_utils.py CHANGED Viewed

@@ -292,4 +292,72 @@ def apply_netflix_style_filter(srt_content):
         output_srt += f"{i}\n{start_time} --> {end_time}\n{formatted_text}\n\n"
-    return output_srt.strip()

         output_srt += f"{i}\n{start_time} --> {end_time}\n{formatted_text}\n\n"
+    return output_srt.strip()
+import subprocess
+import shutil
+def process_audio_for_transcription(input_file: str) -> str:
+    """
+    Process audio to maximize speech clarity for invalid transcription.
+    Applies:
+    1. Mono conversion
+    2. Resampling to 16kHz (Whisper optimal)
+    3. Highpass (200Hz) to remove rumble
+    4. Lowpass (3000Hz) to focus on speech band (telephone quality is sufficient for text)
+    5. AFFTDN (FFT-based noise reduction)
+    6. Dynaudnorm (Dynamic Audio Normalizer) to boost quiet speech
+    7. Compression (generic)
+    Returns path to processed .mp3 file
+    """
+    # Check if ffmpeg exists
+    ffmpeg_cmd = shutil.which("ffmpeg")
+    if not ffmpeg_cmd:
+        print("⚠️ FFmpeg não encontrado. Pulando processamento de áudio.")
+        return input_file
+    output_file = input_file + ".processed.mp3"
+    # Complex filter chain
+    # 1. afftdn: Denoise using FFT (requires noise profile, but default 'nr' often works blindly) - wait, rnnoise is better if available, but afftdn is standard.
+    # Actually, simplistic filters are safer to avoid artifacts.
+    # highpass=f=200, lowpass=f=3000, afftdn=nf=-25, dynaudnorm=f=150:g=15
+    # afftdn might not be available in all builds. Let's stick to safe filters first.
+    # Safe Filter Chain:
+    # 1. silenceremove=stop_periods=-1:stop_duration=1:stop_threshold=-90dB (optional, maybe skip)
+    # 2. highpass=f=200, lowpass=f=3000 (Bandpass)
+    # 3. dynaudnorm (Normalize loudness dynamically)
+    # 4. volume=1.5 (Boost a bit globally)
+    # "Retire qualquer música de fundo" -> Extremely hard without AI like Spleeter.
+    # But aggressive vocal isolation via EQ helps.
+    filters = "highpass=f=200,lowpass=f=3000,dynaudnorm=f=150:g=15"
+    try:
+        command = [
+            ffmpeg_cmd,
+            "-y", # Overwrite
+            "-i", input_file,
+            "-vn", # No video
+            "-ar", "16000", # 16kHz
+            "-ac", "1", # Mono
+            "-af", filters,
+            "-c:a", "libmp3lame",
+            "-q:a", "2", # High quality VBR
+            output_file
+        ]
+        print(f"🔊 Processando áudio com FFmpeg: {' '.join(command)}")
+        subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        if os.path.exists(output_file):
+            return output_file
+    except Exception as e:
+        print(f"⚠️ Falha ao processar áudio: {e}")
+    return input_file