habulaj commited on
Commit
f2b004b
·
verified ·
1 Parent(s): d8ef4f0

Update srt_utils.py

Browse files
Files changed (1) hide show
  1. srt_utils.py +11 -9
srt_utils.py CHANGED
@@ -333,7 +333,9 @@ def process_audio_for_transcription(input_file: str) -> str:
333
  demucs_cmd,
334
  "--two-stems=vocals",
335
  "-n", model,
336
- "-d", "cpu",
 
 
337
  input_file,
338
  "-o", output_dir
339
  ]
@@ -348,24 +350,24 @@ def process_audio_for_transcription(input_file: str) -> str:
348
  print(f"STDERR: {result.stderr}")
349
  # Fallback will trigger below
350
  else:
351
- # Demucs output structure: output_dir / model_name / input_filename_no_ext / vocals.wav
352
  input_filename = os.path.basename(input_file)
353
  input_stem = os.path.splitext(input_filename)[0]
354
 
355
- vocals_path = os.path.join(output_dir, model, input_stem, "vocals.wav")
 
356
 
357
  if os.path.exists(vocals_path):
358
  print(f"✅ Demucs sucesso: {vocals_path}")
359
 
360
- # Convert Wav to MP3 to save space/bandwidth if needed,
361
- # OR just return the wav if Groq supports it (Groq supports wav).
362
- # Let's convert to MP3 16kHz mono to optimize upload to Groq
363
 
364
  final_output = input_file + ".vocals.mp3"
365
 
366
  ffmpeg_cmd = shutil.which("ffmpeg")
367
  if ffmpeg_cmd:
368
- # Compress to mono mp3
369
  cmd_convert = [
370
  ffmpeg_cmd, "-y",
371
  "-i", vocals_path,
@@ -375,7 +377,7 @@ def process_audio_for_transcription(input_file: str) -> str:
375
  ]
376
  subprocess.run(cmd_convert, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
377
 
378
- # Cleanup demucs folder? Maybe keep for cache but better safe space
379
  try: shutil.rmtree(os.path.join(output_dir, model, input_stem))
380
  except: pass
381
 
@@ -389,4 +391,4 @@ def process_audio_for_transcription(input_file: str) -> str:
389
  traceback.print_exc()
390
 
391
  print("⚠️ Retornando arquivo original (fallback)")
392
- return input_file
 
333
  demucs_cmd,
334
  "--two-stems=vocals",
335
  "-n", model,
336
+ "-d", "cpu",
337
+ "--mp3", # Output as MP3 directly
338
+ "--mp3-bitrate", "128",
339
  input_file,
340
  "-o", output_dir
341
  ]
 
350
  print(f"STDERR: {result.stderr}")
351
  # Fallback will trigger below
352
  else:
353
+ # Demucs output structure: output_dir / model_name / input_filename_no_ext / vocals.mp3 (NOTE: .mp3 now)
354
  input_filename = os.path.basename(input_file)
355
  input_stem = os.path.splitext(input_filename)[0]
356
 
357
+ # Check for mp3
358
+ vocals_path = os.path.join(output_dir, model, input_stem, "vocals.mp3")
359
 
360
  if os.path.exists(vocals_path):
361
  print(f"✅ Demucs sucesso: {vocals_path}")
362
 
363
+ # Resample to 16k just to be sure and mono? Demucs output might be stereo 44.1k
364
+ # Groq takes mp3 fine, but 16k mono is smaller/faster.
 
365
 
366
  final_output = input_file + ".vocals.mp3"
367
 
368
  ffmpeg_cmd = shutil.which("ffmpeg")
369
  if ffmpeg_cmd:
370
+ # Compress to mono mp3 16k
371
  cmd_convert = [
372
  ffmpeg_cmd, "-y",
373
  "-i", vocals_path,
 
377
  ]
378
  subprocess.run(cmd_convert, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
379
 
380
+ # Cleanup demucs folder
381
  try: shutil.rmtree(os.path.join(output_dir, model, input_stem))
382
  except: pass
383
 
 
391
  traceback.print_exc()
392
 
393
  print("⚠️ Retornando arquivo original (fallback)")
394
+ return input_file