Spaces:

habulaj
/

subapi

Running

App Files Files Community

habulaj commited on Jan 27

Commit

f2b004b

verified ·

1 Parent(s): d8ef4f0

Update srt_utils.py

Browse files

Files changed (1) hide show

srt_utils.py +11 -9

srt_utils.py CHANGED Viewed

@@ -333,7 +333,9 @@ def process_audio_for_transcription(input_file: str) -> str:
             demucs_cmd,
             "--two-stems=vocals",
             "-n", model,
-            "-d", "cpu",
             input_file,
             "-o", output_dir
         ]
@@ -348,24 +350,24 @@ def process_audio_for_transcription(input_file: str) -> str:
             print(f"STDERR: {result.stderr}")
             # Fallback will trigger below
         else:
-            # Demucs output structure: output_dir / model_name / input_filename_no_ext / vocals.wav
             input_filename = os.path.basename(input_file)
             input_stem = os.path.splitext(input_filename)[0]
-            vocals_path = os.path.join(output_dir, model, input_stem, "vocals.wav")
             if os.path.exists(vocals_path):
                 print(f"✅ Demucs sucesso: {vocals_path}")
-                # Convert Wav to MP3 to save space/bandwidth if needed,
-                # OR just return the wav if Groq supports it (Groq supports wav).
-                # Let's convert to MP3 16kHz mono to optimize upload to Groq
                 final_output = input_file + ".vocals.mp3"
                 ffmpeg_cmd = shutil.which("ffmpeg")
                 if ffmpeg_cmd:
-                     # Compress to mono mp3
                      cmd_convert = [
                         ffmpeg_cmd, "-y",
                         "-i", vocals_path,
@@ -375,7 +377,7 @@ def process_audio_for_transcription(input_file: str) -> str:
                      ]
                      subprocess.run(cmd_convert, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-                     # Cleanup demucs folder? Maybe keep for cache but better safe space
                      try: shutil.rmtree(os.path.join(output_dir, model, input_stem))
                      except: pass
@@ -389,4 +391,4 @@ def process_audio_for_transcription(input_file: str) -> str:
         traceback.print_exc()
     print("⚠️ Retornando arquivo original (fallback)")
-    return input_file

             demucs_cmd,
             "--two-stems=vocals",
             "-n", model,
+            "-d", "cpu",
+            "--mp3", # Output as MP3 directly
+            "--mp3-bitrate", "128",
             input_file,
             "-o", output_dir
         ]
             print(f"STDERR: {result.stderr}")
             # Fallback will trigger below
         else:
+            # Demucs output structure: output_dir / model_name / input_filename_no_ext / vocals.mp3 (NOTE: .mp3 now)
             input_filename = os.path.basename(input_file)
             input_stem = os.path.splitext(input_filename)[0]
+            # Check for mp3
+            vocals_path = os.path.join(output_dir, model, input_stem, "vocals.mp3")
             if os.path.exists(vocals_path):
                 print(f"✅ Demucs sucesso: {vocals_path}")
+                # Resample to 16k just to be sure and mono? Demucs output might be stereo 44.1k
+                # Groq takes mp3 fine, but 16k mono is smaller/faster.
                 final_output = input_file + ".vocals.mp3"
                 ffmpeg_cmd = shutil.which("ffmpeg")
                 if ffmpeg_cmd:
+                     # Compress to mono mp3 16k
                      cmd_convert = [
                         ffmpeg_cmd, "-y",
                         "-i", vocals_path,
                      ]
                      subprocess.run(cmd_convert, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+                     # Cleanup demucs folder
                      try: shutil.rmtree(os.path.join(output_dir, model, input_stem))
                      except: pass
         traceback.print_exc()
     print("⚠️ Retornando arquivo original (fallback)")
+    return input_file