Spaces:
Running
Running
futurespyhi
commited on
Commit
Β·
3dfa434
1
Parent(s):
28df979
Enhance audio generation debugging and error handling
Browse files- Add comprehensive debugging output for audio decoding pipeline
- Implement silent audio detection with amplitude checking
- Improve error handling with try-catch blocks for robustness
- Expand critical file validation in model download process
- Optimize audio file search priority for better quality output
- Update gitignore to exclude test files and logs
- .gitignore +5 -1
- YuEGP/inference/infer.py +45 -15
- app.py +6 -0
- download_models.py +10 -2
.gitignore
CHANGED
|
@@ -109,6 +109,10 @@ flash_attn/
|
|
| 109 |
packages.txt.backup
|
| 110 |
requirements.txt.backup
|
| 111 |
download_model.py.backup
|
|
|
|
| 112 |
|
| 113 |
# test files
|
| 114 |
-
tests/
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
packages.txt.backup
|
| 110 |
requirements.txt.backup
|
| 111 |
download_model.py.backup
|
| 112 |
+
decode_vocode.txt
|
| 113 |
|
| 114 |
# test files
|
| 115 |
+
tests/
|
| 116 |
+
|
| 117 |
+
# YuE
|
| 118 |
+
YuE/
|
YuEGP/inference/infer.py
CHANGED
|
@@ -192,7 +192,7 @@ def load_audio_mono(filepath, sampling_rate=16000):
|
|
| 192 |
audio = resampler(audio)
|
| 193 |
return audio
|
| 194 |
|
| 195 |
-
|
| 196 |
def encode_audio(codec_model, audio_prompt, device, target_bw=0.5):
|
| 197 |
if len(audio_prompt.shape) < 3:
|
| 198 |
audio_prompt.unsqueeze_(0)
|
|
@@ -511,17 +511,32 @@ recons_mix_dir = os.path.join(recons_output_dir, 'mix')
|
|
| 511 |
os.makedirs(recons_mix_dir, exist_ok=True)
|
| 512 |
tracks = []
|
| 513 |
for npy in stage2_result:
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
# mix tracks
|
| 526 |
for inst_path in tracks:
|
| 527 |
try:
|
|
@@ -533,12 +548,27 @@ for inst_path in tracks:
|
|
| 533 |
continue
|
| 534 |
# mix
|
| 535 |
recons_mix = os.path.join(recons_mix_dir, os.path.basename(inst_path).replace('_itrack', '_mixed'))
|
| 536 |
-
|
| 537 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 538 |
mix_stem = (vocal_stem + instrumental_stem) / 1
|
|
|
|
|
|
|
| 539 |
sf.write(recons_mix, mix_stem, sr)
|
|
|
|
| 540 |
except Exception as e:
|
| 541 |
-
print(e)
|
| 542 |
|
| 543 |
# vocoder to upsample audios
|
| 544 |
vocal_decoder, inst_decoder = build_codec_model(args.config_path, args.vocal_decoder_path, args.inst_decoder_path)
|
|
|
|
| 192 |
audio = resampler(audio)
|
| 193 |
return audio
|
| 194 |
|
| 195 |
+
#ι³ι’ηΌη
|
| 196 |
def encode_audio(codec_model, audio_prompt, device, target_bw=0.5):
|
| 197 |
if len(audio_prompt.shape) < 3:
|
| 198 |
audio_prompt.unsqueeze_(0)
|
|
|
|
| 511 |
os.makedirs(recons_mix_dir, exist_ok=True)
|
| 512 |
tracks = []
|
| 513 |
for npy in stage2_result:
|
| 514 |
+
try:
|
| 515 |
+
codec_result = np.load(npy)
|
| 516 |
+
print(f"Decoding:{npy}, shape: {codec_result.shape}, dtype: {codec_result.dtype}")
|
| 517 |
+
print(f"π Codec values range: [{codec_result.min()}, {codec_result.max()}]")
|
| 518 |
+
# decodec_rlt = []
|
| 519 |
+
with torch.no_grad():
|
| 520 |
+
codec_tensor = torch.as_tensor(codec_result, dtype=torch.long).unsqueeze(0).permute(1, 0, 2).to(device)
|
| 521 |
+
print(f"π Codec tensor shape: {codec_tensor.shape}")
|
| 522 |
+
|
| 523 |
+
decoded_waveform = codec_model.decode(codec_tensor)
|
| 524 |
+
print(f"π Decoded waveform shape: {decoded_waveform.shape}")
|
| 525 |
+
decoded_waveform = decoded_waveform.cpu().squeeze(0)
|
| 526 |
+
print(f"π Decoded audio range: [{decoded_waveform.min():.6f}, {decoded_waveform.max():.6f}]")
|
| 527 |
+
# decodec_rlt.append(torch.as_tensor(decoded_waveform, device="cpu"))
|
| 528 |
+
# decodec_rlt = torch.cat(decodec_rlt, dim=-1)
|
| 529 |
+
save_path = os.path.join(recons_output_dir, os.path.splitext(os.path.basename(npy))[0] + ".mp3")
|
| 530 |
+
tracks.append(save_path)
|
| 531 |
+
if decoded_waveform.abs().max() < 1e-6:
|
| 532 |
+
print(f"β οΈ WARNING: {npy} decoded to silent audio!")
|
| 533 |
+
else:
|
| 534 |
+
print(f"β
Audio has sound, max amplitude: {decoded_waveform.abs().max():.6f}")
|
| 535 |
+
save_audio(decoded_waveform, save_path, 16000)
|
| 536 |
+
print(f"Saved Path:{save_path}")
|
| 537 |
+
except Exception as e:
|
| 538 |
+
print(f"Error decoding {npy}:{e}")
|
| 539 |
+
continue
|
| 540 |
# mix tracks
|
| 541 |
for inst_path in tracks:
|
| 542 |
try:
|
|
|
|
| 548 |
continue
|
| 549 |
# mix
|
| 550 |
recons_mix = os.path.join(recons_mix_dir, os.path.basename(inst_path).replace('_itrack', '_mixed'))
|
| 551 |
+
print(f"π΅ Mixing: {os.path.basename(vocal_path)} + {os.path.basename(inst_path)}")
|
| 552 |
+
|
| 553 |
+
vocal_stem, sr = sf.read(vocal_path)
|
| 554 |
+
instrumental_stem, _ = sf.read(inst_path)
|
| 555 |
+
|
| 556 |
+
print(f"π Vocal stem - shape: {vocal_stem.shape}, max: {np.abs(vocal_stem).max():.6f}, sr: {sr}")
|
| 557 |
+
print(f"π Instrumental stem - shape: {instrumental_stem.shape}, max: {np.abs(instrumental_stem).max():.6f}")
|
| 558 |
+
|
| 559 |
+
# Check for silent tracks
|
| 560 |
+
if np.abs(vocal_stem).max() < 1e-6:
|
| 561 |
+
print(f"β οΈ WARNING: Vocal track is silent!")
|
| 562 |
+
if np.abs(instrumental_stem).max() < 1e-6:
|
| 563 |
+
print(f"β οΈ WARNING: Instrumental track is silent!")
|
| 564 |
+
|
| 565 |
mix_stem = (vocal_stem + instrumental_stem) / 1
|
| 566 |
+
print(f"π Mixed stem - max amplitude: {np.abs(mix_stem).max():.6f}")
|
| 567 |
+
|
| 568 |
sf.write(recons_mix, mix_stem, sr)
|
| 569 |
+
print(f"β
Mixed audio saved: {recons_mix}")
|
| 570 |
except Exception as e:
|
| 571 |
+
print(f"β Error mixing tracks {inst_path}: {e}")
|
| 572 |
|
| 573 |
# vocoder to upsample audios
|
| 574 |
vocal_decoder, inst_decoder = build_codec_model(args.config_path, args.vocal_decoder_path, args.inst_decoder_path)
|
app.py
CHANGED
|
@@ -509,6 +509,12 @@ def generate_music_spaces(lyrics: str, genre: str, mood: str, progress=gr.Progre
|
|
| 509 |
# Find generated audio file - prioritize mixed audio from vocoder/mix directory
|
| 510 |
import glob
|
| 511 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 512 |
# First look for the final mixed audio in vocoder/mix
|
| 513 |
mixed_files = glob.glob(os.path.join(output_dir, "vocoder/mix/*_mixed.mp3"))
|
| 514 |
if mixed_files:
|
|
|
|
| 509 |
# Find generated audio file - prioritize mixed audio from vocoder/mix directory
|
| 510 |
import glob
|
| 511 |
|
| 512 |
+
final_files = glob.glob(os.path.join(output_dir, "*_mixed.mp3"))
|
| 513 |
+
if final_files:
|
| 514 |
+
progress(1.0, desc="Finish music generation")
|
| 515 |
+
print(f"β
Found audio file at root: {final_files[0]}")
|
| 516 |
+
return final_files[0]
|
| 517 |
+
|
| 518 |
# First look for the final mixed audio in vocoder/mix
|
| 519 |
mixed_files = glob.glob(os.path.join(output_dir, "vocoder/mix/*_mixed.mp3"))
|
| 520 |
if mixed_files:
|
download_models.py
CHANGED
|
@@ -185,11 +185,19 @@ def ensure_model_availability():
|
|
| 185 |
|
| 186 |
xcodec_base = Path("YuEGP/inference/xcodec_mini_infer")
|
| 187 |
|
| 188 |
-
# Check if critical
|
| 189 |
critical_files = [
|
|
|
|
| 190 |
xcodec_base / "decoders" / "decoder_131000.pth",
|
| 191 |
xcodec_base / "decoders" / "decoder_151000.pth",
|
| 192 |
-
xcodec_base / "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
]
|
| 194 |
|
| 195 |
missing_files = [f for f in critical_files if not f.exists()]
|
|
|
|
| 185 |
|
| 186 |
xcodec_base = Path("YuEGP/inference/xcodec_mini_infer")
|
| 187 |
|
| 188 |
+
# Check if critical files exist (both for recons and vocoder stages)
|
| 189 |
critical_files = [
|
| 190 |
+
# Vocoder stage files
|
| 191 |
xcodec_base / "decoders" / "decoder_131000.pth",
|
| 192 |
xcodec_base / "decoders" / "decoder_151000.pth",
|
| 193 |
+
xcodec_base / "decoders" / "config.yaml",
|
| 194 |
+
|
| 195 |
+
# Recons stage files (critical for audio decoding)
|
| 196 |
+
xcodec_base / "final_ckpt" / "ckpt_00360000.pth",
|
| 197 |
+
xcodec_base / "final_ckpt" / "config.yaml",
|
| 198 |
+
|
| 199 |
+
# Python modules
|
| 200 |
+
xcodec_base / "models" / "soundstream_hubert_new.py"
|
| 201 |
]
|
| 202 |
|
| 203 |
missing_files = [f for f in critical_files if not f.exists()]
|