Update srt_utils.py
Browse files- srt_utils.py +11 -9
srt_utils.py
CHANGED
|
@@ -333,7 +333,9 @@ def process_audio_for_transcription(input_file: str) -> str:
|
|
| 333 |
demucs_cmd,
|
| 334 |
"--two-stems=vocals",
|
| 335 |
"-n", model,
|
| 336 |
-
"-d", "cpu",
|
|
|
|
|
|
|
| 337 |
input_file,
|
| 338 |
"-o", output_dir
|
| 339 |
]
|
|
@@ -348,24 +350,24 @@ def process_audio_for_transcription(input_file: str) -> str:
|
|
| 348 |
print(f"STDERR: {result.stderr}")
|
| 349 |
# Fallback will trigger below
|
| 350 |
else:
|
| 351 |
-
# Demucs output structure: output_dir / model_name / input_filename_no_ext / vocals.
|
| 352 |
input_filename = os.path.basename(input_file)
|
| 353 |
input_stem = os.path.splitext(input_filename)[0]
|
| 354 |
|
| 355 |
-
|
|
|
|
| 356 |
|
| 357 |
if os.path.exists(vocals_path):
|
| 358 |
print(f"✅ Demucs sucesso: {vocals_path}")
|
| 359 |
|
| 360 |
-
#
|
| 361 |
-
#
|
| 362 |
-
# Let's convert to MP3 16kHz mono to optimize upload to Groq
|
| 363 |
|
| 364 |
final_output = input_file + ".vocals.mp3"
|
| 365 |
|
| 366 |
ffmpeg_cmd = shutil.which("ffmpeg")
|
| 367 |
if ffmpeg_cmd:
|
| 368 |
-
# Compress to mono mp3
|
| 369 |
cmd_convert = [
|
| 370 |
ffmpeg_cmd, "-y",
|
| 371 |
"-i", vocals_path,
|
|
@@ -375,7 +377,7 @@ def process_audio_for_transcription(input_file: str) -> str:
|
|
| 375 |
]
|
| 376 |
subprocess.run(cmd_convert, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 377 |
|
| 378 |
-
# Cleanup demucs folder
|
| 379 |
try: shutil.rmtree(os.path.join(output_dir, model, input_stem))
|
| 380 |
except: pass
|
| 381 |
|
|
@@ -389,4 +391,4 @@ def process_audio_for_transcription(input_file: str) -> str:
|
|
| 389 |
traceback.print_exc()
|
| 390 |
|
| 391 |
print("⚠️ Retornando arquivo original (fallback)")
|
| 392 |
-
return input_file
|
|
|
|
| 333 |
demucs_cmd,
|
| 334 |
"--two-stems=vocals",
|
| 335 |
"-n", model,
|
| 336 |
+
"-d", "cpu",
|
| 337 |
+
"--mp3", # Output as MP3 directly
|
| 338 |
+
"--mp3-bitrate", "128",
|
| 339 |
input_file,
|
| 340 |
"-o", output_dir
|
| 341 |
]
|
|
|
|
| 350 |
print(f"STDERR: {result.stderr}")
|
| 351 |
# Fallback will trigger below
|
| 352 |
else:
|
| 353 |
+
# Demucs output structure: output_dir / model_name / input_filename_no_ext / vocals.mp3 (NOTE: .mp3 now)
|
| 354 |
input_filename = os.path.basename(input_file)
|
| 355 |
input_stem = os.path.splitext(input_filename)[0]
|
| 356 |
|
| 357 |
+
# Check for mp3
|
| 358 |
+
vocals_path = os.path.join(output_dir, model, input_stem, "vocals.mp3")
|
| 359 |
|
| 360 |
if os.path.exists(vocals_path):
|
| 361 |
print(f"✅ Demucs sucesso: {vocals_path}")
|
| 362 |
|
| 363 |
+
# Resample to 16k just to be sure and mono? Demucs output might be stereo 44.1k
|
| 364 |
+
# Groq takes mp3 fine, but 16k mono is smaller/faster.
|
|
|
|
| 365 |
|
| 366 |
final_output = input_file + ".vocals.mp3"
|
| 367 |
|
| 368 |
ffmpeg_cmd = shutil.which("ffmpeg")
|
| 369 |
if ffmpeg_cmd:
|
| 370 |
+
# Compress to mono mp3 16k
|
| 371 |
cmd_convert = [
|
| 372 |
ffmpeg_cmd, "-y",
|
| 373 |
"-i", vocals_path,
|
|
|
|
| 377 |
]
|
| 378 |
subprocess.run(cmd_convert, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 379 |
|
| 380 |
+
# Cleanup demucs folder
|
| 381 |
try: shutil.rmtree(os.path.join(output_dir, model, input_stem))
|
| 382 |
except: pass
|
| 383 |
|
|
|
|
| 391 |
traceback.print_exc()
|
| 392 |
|
| 393 |
print("⚠️ Retornando arquivo original (fallback)")
|
| 394 |
+
return input_file
|