microsoft
/

VibeVoice-ASR-HF

Audio-Text-to-Text

automatic-speech-recognition

Model card Files Files and versions

VibeVoice-ASR-HF / chat_template.jinja

frontierai's picture

Initial commit

8623f43 verified 1 day ago

history blame contribute delete

1.24 kB

	{%- set system_prompt = system_prompt \| default("You are a helpful assistant that transcribes audio input into text output in JSON format.") -%}
	<\|im_start\|>system
	{{ system_prompt }}<\|im_end\|>
	{%- set audio_token = audio_token \| default("<\|box_start\|>") -%}
	{%- set audio_start_token = "<\|object_ref_start\|>" -%}
	{%- set audio_end_token = "<\|object_ref_end\|>" -%}
	{%- for message in messages -%}
	{%- if message['role'] == 'user' -%}
	{{ '
	' }}<\|im_start\|>user{{ '
	' }}{%- set text_items = message['content'] \| selectattr('type', 'equalto', 'text') \| list -%}
	{%- set context_text = text_items[0]['text'] if text_items else none -%}
	{%- for item in message['content'] -%}
	{%- if item['type'] == 'audio' -%}
	{{ audio_start_token }}{{ audio_token }}{{ audio_end_token }}{{ "
	" }}{%- if context_text -%}
	This is a <\|AUDIO_DURATION\|> seconds audio, with extra info: {{ context_text }}

	Please transcribe it with these keys: Start time, End time, Speaker ID, Content{%- else -%}
	This is a <\|AUDIO_DURATION\|> seconds audio, please transcribe it with these keys: Start time, End time, Speaker ID, Content{%- endif -%}
	{%- endif -%}
	{%- endfor -%}
	<\|im_end\|>{{ '
	' }}
	{%- endif -%}
	{%- endfor -%}