Spaces:

habulaj
/

subapi

Running

App Files Files Community

habulaj commited on 16 days ago

Commit

fdb9ed2

verified ·

1 Parent(s): 4aa5e54

Update srt_utils.py

Browse files

Files changed (1) hide show

srt_utils.py +94 -90

srt_utils.py CHANGED Viewed

@@ -298,107 +298,111 @@ import subprocess
 import shutil
 import os
-def process_audio_for_transcription(input_file: str) -> str:
     """
-    Process audio to maximize speech clarity using Demucs (AI Source Separation).
-    It separates the audio into stems (vocals, drums, bass, other) and returns ONLY the vocals.
     Returns path to processed .mp3 file (vocals)
     """
-    print(f"🔊 [Demucs] Iniciando isolamento de voz via AI...")
-    # Output directory for demucs
-    output_dir = os.path.join("static", "separated")
     os.makedirs(output_dir, exist_ok=True)
-    # Demucs works best with CLI.
-    # Command: demucs --two-stems=vocals -n htdemucs_ft "input_file" -o "output_dir"
-    # --two-stems=vocals -> Saves time by only separating vocals/other
-    # -n htdemucs_ft -> High quality model (might be slow, maybe use htdemucs if too slow)
-    # Let's use `htdemucs` which is good balance.
-    # Check if demucs is installed (it should be via requirements.txt)
-    demucs_cmd = shutil.which("demucs")
-    if not demucs_cmd:
-        # Fallback to python -m demucs
-        demucs_cmd = "demucs"
-    try:
-        # Run Demucs
-        # NOTE: First run will download model (~100MB+).
-        model = "htdemucs" # Good default
-        command = [
-            demucs_cmd,
-            "--two-stems=vocals",
-            "-n", model,
-            "-d", "cpu",
-            "--mp3", # Output as MP3 directly
-            "--mp3-bitrate", "128",
-            input_file,
-            "-o", output_dir
-        ]
-        print(f"🔊 Executando Demucs: {' '.join(command)}")
-        # Capture output for debugging
-        result = subprocess.run(command, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-        if result.returncode != 0:
-            print(f"⚠️ Erro no Demucs (Code {result.returncode}):")
-            print(f"STDOUT: {result.stdout}")
-            print(f"STDERR: {result.stderr}")
-            # Fallback will trigger below
-        else:
-            # Demucs output structure: output_dir / model_name / input_filename_no_ext / vocals.mp3 (NOTE: .mp3 now)
-            input_filename = os.path.basename(input_file)
-            input_stem = os.path.splitext(input_filename)[0]
-            # Check for mp3
-            vocals_path = os.path.join(output_dir, model, input_stem, "vocals.mp3")
-            if os.path.exists(vocals_path):
-                print(f"✅ Demucs sucesso: {vocals_path}")
-                # Resample to 16k just to be sure and mono? Demucs output might be stereo 44.1k
-                # Groq takes mp3 fine, but 16k mono is smaller/faster.
-                final_output = input_file + ".vocals.mp3"
-                ffmpeg_cmd = shutil.which("ffmpeg")
-                if ffmpeg_cmd:
-                     # Compress to mono mp3 16k with aggressive voice enhancement
-                     # Filters include highpass, noise reduction, compression, EQ, and normalization
-                     filter_chain = (
-                        "highpass=f=100,"
-                        "afftdn=nr=10:nf=-50:tn=1,"
-                        "compand=attacks=0:points=-80/-90|-45/-25|-27/-9|0/-7:gain=5,"
-                        "equalizer=f=3000:width_type=h:width=1000:g=5,"
-                        "loudnorm"
-                     )
-                     cmd_convert = [
-                        ffmpeg_cmd, "-y",
-                        "-i", vocals_path,
-                        "-ac", "1", "-ar", "16000",
-                        "-af", filter_chain,
-                        "-c:a", "libmp3lame", "-q:a", "2",
-                        final_output
-                     ]
-                     subprocess.run(cmd_convert, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-                     # Cleanup demucs folder
-                     try: shutil.rmtree(os.path.join(output_dir, model, input_stem))
-                     except: pass
-                     return final_output
-                return vocals_path
-    except Exception as e:
-        print(f"⚠️ Falha no Demucs: {e}")
-        import traceback
-        traceback.print_exc()
-    print("⚠️ Retornando arquivo original (fallback)")
-    return input_file

 import shutil
 import os
+def process_audio_for_transcription(input_file: str, has_bg_music: bool = False) -> str:
     """
+    Process audio to maximize speech clarity.
+    Args:
+        input_file: Path to input audio
+        has_bg_music: If True, uses Demucs to remove background music (slow).
+                      If False, skips Demucs but applies voice enhancement filters (fast).
     Returns path to processed .mp3 file (vocals)
     """
+    # Output directory for processed files
+    output_dir = os.path.join("static", "processed")
     os.makedirs(output_dir, exist_ok=True)
+    input_filename = os.path.basename(input_file)
+    input_stem = os.path.splitext(input_filename)[0]
+    final_output = os.path.join(output_dir, f"{input_stem}.processed.mp3")
+    ffmpeg_cmd = shutil.which("ffmpeg")
+    if not ffmpeg_cmd:
+        print("⚠️ FFmpeg não encontrado!")
+        return input_file
+    vocals_path = input_file
+    # 1. Background Music Removal (Demucs) - OPTIONAL
+    if has_bg_music:
+        print(f"🔊 [Demucs] Iniciando isolamento de voz via AI (has_bg_music=True)...")
+        demucs_output_dir = os.path.join("static", "separated")
+        os.makedirs(demucs_output_dir, exist_ok=True)
+        # Check demucs availability
+        demucs_cmd = shutil.which("demucs")
+        if not demucs_cmd:
+            demucs_cmd = "demucs" # Fallback to path alias
+        try:
+            model = "htdemucs"
+            command = [
+                demucs_cmd,
+                "--two-stems=vocals",
+                "-n", model,
+                "-d", "cpu",
+                "--mp3",
+                "--mp3-bitrate", "128",
+                input_file,
+                "-o", demucs_output_dir
+            ]
+            print(f"🔊 Executando Demucs...")
+            result = subprocess.run(command, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+            if result.returncode == 0:
+                # Demucs success
+                # Path: output_dir / model_name / input_filename_no_ext / vocals.mp3
+                demucs_vocals = os.path.join(demucs_output_dir, model, input_stem, "vocals.mp3")
+                if os.path.exists(demucs_vocals):
+                    print(f"✅ Demucs sucesso: {demucs_vocals}")
+                    vocals_path = demucs_vocals
+            else:
+                print(f"⚠️ Erro no Demucs (Code {result.returncode}), continuando com audio original.")
+        except Exception as e:
+            print(f"⚠️ Falha no Demucs: {e}")
+    else:
+        print(f"⏩ [Demucs] Pulando remoção de música (has_bg_music=False).")
+    # 2. Voice Enhancement (FFmpeg Filters) - ALWAYS RUN
+    print(f"🔊 [FFmpeg] Aplicando filtros de melhoria de voz...")
+    # Compress to mono mp3 16k with aggressive voice enhancement
+    # Filters include highpass, noise reduction, compression, EQ, and normalization
+    filter_chain = (
+        "highpass=f=100,"
+        "afftdn=nr=10:nf=-50:tn=1,"
+        "compand=attacks=0:points=-80/-90|-45/-25|-27/-9|0/-7:gain=5,"
+        "equalizer=f=3000:width_type=h:width=1000:g=5,"
+        "loudnorm"
+    )
+    cmd_convert = [
+        ffmpeg_cmd, "-y",
+        "-i", vocals_path,
+        "-ac", "1", "-ar", "16000",
+        "-af", filter_chain,
+        "-c:a", "libmp3lame", "-q:a", "2",
+        final_output
+    ]
+    try:
+        subprocess.run(cmd_convert, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        # Cleanup demucs folder if it was used
+        if has_bg_music and "separated" in vocals_path:
+             try:
+                 # We need to find the parent folder of 'vocals.mp3' which is the song folder
+                 song_folder = os.path.dirname(vocals_path)
+                 shutil.rmtree(song_folder)
+             except: pass
+        return final_output
+    except Exception as e:
+        print(f"⚠️ Erro no FFmpeg: {e}")
+        return vocals_path