| | |
| | """ |
| | Simple example of using the pre-quantized VibeVoice model |
| | No need for on-the-fly quantization - loads much faster! |
| | """ |
| |
|
| | import os |
| | import torch |
| | from vibevoice.modular.modeling_vibevoice_inference import VibeVoiceForConditionalGenerationInference |
| | from vibevoice.processor.vibevoice_processor import VibeVoiceProcessor |
| |
|
| | def main(): |
| | |
| | model_path = "/home/deveraux/Desktop/vibevoice/VibeVoice-Large-4bit" |
| | |
| | print("Loading pre-quantized VibeVoice 4-bit model...") |
| | |
| | |
| | processor = VibeVoiceProcessor.from_pretrained(model_path) |
| | |
| | |
| | |
| | model = VibeVoiceForConditionalGenerationInference.from_pretrained( |
| | model_path, |
| | device_map='cuda', |
| | torch_dtype=torch.bfloat16, |
| | ) |
| | model.eval() |
| | |
| | |
| | memory_gb = torch.cuda.memory_allocated() / 1e9 |
| | print(f"✅ Model loaded! Memory usage: {memory_gb:.1f} GB") |
| | |
| | |
| | text = "Speaker 1: Welcome to our podcast! Speaker 2: Thanks for having me!" |
| | |
| | |
| | voices_dir = "/home/deveraux/Desktop/vibevoice/VibeVoice-main/demo/voices" |
| | speaker_voices = [ |
| | os.path.join(voices_dir, "en-Alice_woman.wav"), |
| | os.path.join(voices_dir, "en-Carter_man.wav") |
| | ] |
| | |
| | |
| | inputs = processor( |
| | text=[text], |
| | voice_samples=[speaker_voices], |
| | padding=True, |
| | return_tensors="pt", |
| | return_attention_mask=True, |
| | ) |
| | |
| | |
| | print(f"\nGenerating: '{text}'") |
| | with torch.no_grad(): |
| | outputs = model.generate( |
| | **inputs, |
| | max_new_tokens=None, |
| | cfg_scale=1.3, |
| | tokenizer=processor.tokenizer, |
| | generation_config={'do_sample': False}, |
| | ) |
| | |
| | |
| | output_path = "quantized_output.wav" |
| | processor.save_audio(outputs.speech_outputs[0], output_path=output_path) |
| | print(f"✅ Audio saved to: {output_path}") |
| |
|
| | if __name__ == "__main__": |
| | main() |