| """VibeVoice vLLM Plugin - Registers VibeVoice model for vLLM inference. |
| |
| This plugin enables VibeVoice ASR models to be loaded and served through vLLM. |
| It registers the model architecture, configuration, tokenizer, and processor |
| with their respective registries. |
| |
| The plugin is automatically loaded by vLLM via the 'vllm.general_plugins' |
| entry point defined in pyproject.toml. |
| """ |
|
|
| from vllm.model_executor.models import ModelRegistry |
| from transformers import AutoConfig, AutoTokenizer, Qwen2Tokenizer, AutoProcessor, Qwen2AudioProcessor |
|
|
| from vibevoice.modular.configuration_vibevoice import VibeVoiceConfig |
| from vibevoice.modular.modular_vibevoice_text_tokenizer import VibeVoiceASRTextTokenizerFast |
|
|
| from .model import VibeVoiceForCausalLM |
|
|
|
|
| def register_vibevoice(): |
| """Register VibeVoice model with vLLM and transformers. |
| |
| This function is called automatically by vLLM through the entry point |
| mechanism. It registers: |
| - VibeVoiceConfig with AutoConfig |
| - VibeVoiceASRTextTokenizerFast with AutoTokenizer (for ASR) |
| - Qwen2AudioProcessor with AutoProcessor |
| - VibeVoiceForCausalLM with vLLM ModelRegistry |
| """ |
| |
| AutoConfig.register("vibevoice", VibeVoiceConfig) |
|
|
| |
| |
| |
| |
| |
| |
| |
| try: |
| AutoTokenizer.register( |
| VibeVoiceConfig, |
| slow_tokenizer_class=Qwen2Tokenizer, |
| fast_tokenizer_class=VibeVoiceASRTextTokenizerFast, |
| ) |
| except Exception: |
| pass |
|
|
| |
| try: |
| AutoProcessor.register(VibeVoiceConfig, processor_class=Qwen2AudioProcessor) |
| except Exception: |
| pass |
|
|
| |
| |
| ModelRegistry.register_model("VibeVoice", VibeVoiceForCausalLM) |
| ModelRegistry.register_model("VibeVoiceForASRTraining", VibeVoiceForCausalLM) |
|
|
|
|
| |
| |
|
|