VibeVoice-Large-8bit-quantized-test2 / load_quantized_8bit.py
debi1234's picture
Upload stable 8-bit quantized VibeVoice-Large with outlier detection
1800268 verified
#!/usr/bin/env python
"""
Load and use the 8-bit quantized VibeVoice model
"""
import torch
from transformers import BitsAndBytesConfig
from vibevoice.modular.modeling_vibevoice_inference import VibeVoiceForConditionalGenerationInference
from vibevoice.processor.vibevoice_processor import VibeVoiceProcessor
def load_quantized_model(model_path="/kaggle/working/quantized_8bit_FIXED"):
"""Load the pre-quantized VibeVoice model"""
print("Loading 8-bit quantized VibeVoice model...")
bnb_config = BitsAndBytesConfig(
load_in_8bit=True,
bnb_8bit_compute_dtype=torch.bfloat16,
)
processor = VibeVoiceProcessor.from_pretrained(model_path)
model = VibeVoiceForConditionalGenerationInference.from_pretrained(
model_path,
quantization_config=bnb_config,
device_map='cuda',
torch_dtype=torch.bfloat16,
)
model.eval()
print("✅ Model loaded successfully!")
print(f"💾 Memory usage: {torch.cuda.memory_allocated() / 1e9:.1f} GB")
return model, processor
if __name__ == "__main__":
model, processor = load_quantized_model()
text = "Speaker 1: Hello! Speaker 2: Hi there!"
inputs = processor(
text=[text],
voice_samples=[["path/to/voice1.wav", "path/to/voice2.wav"]],
padding=True,
return_tensors="pt",
)
with torch.no_grad():
outputs = model.generate(**inputs)
processor.save_audio(outputs.speech_outputs[0], "output.wav")