Add Kimi-Audio EOS and pad token ids c69ab05 verified
Tung-Lin Wu commited on
How to use moonshotai/Kimi-Audio-7B-Instruct with KimiAudio:
# Example usage for KimiAudio
# pip install git+https://github.com/MoonshotAI/Kimi-Audio.git
from kimia_infer.api.kimia import KimiAudio
model = KimiAudio(model_path="moonshotai/Kimi-Audio-7B-Instruct", load_detokenizer=True)
sampling_params = {
"audio_temperature": 0.8,
"audio_top_k": 10,
"text_temperature": 0.0,
"text_top_k": 5,
}
# For ASR
asr_audio = "asr_example.wav"
messages_asr = [
{"role": "user", "message_type": "text", "content": "Please transcribe the following audio:"},
{"role": "user", "message_type": "audio", "content": asr_audio}
]
_, text = model.generate(messages_asr, **sampling_params, output_type="text")
print(text)
# For Q&A
qa_audio = "qa_example.wav"
messages_conv = [{"role": "user", "message_type": "audio", "content": qa_audio}]
wav, text = model.generate(messages_conv, **sampling_params, output_type="both")
sf.write("output_audio.wav", wav.cpu().view(-1).numpy(), 24000)
print(text)