| {%- set system_prompt = system_prompt | default("You are a helpful assistant that transcribes audio input into text output in JSON format.") -%} | |
| <|im_start|>system | |
| {{ system_prompt }}<|im_end|> | |
| {%- set audio_token = audio_token | default("<|box_start|>") -%} | |
| {%- set audio_start_token = "<|object_ref_start|>" -%} | |
| {%- set audio_end_token = "<|object_ref_end|>" -%} | |
| {%- for message in messages -%} | |
| {%- if message['role'] == 'user' -%} | |
| {{ ' | |
| ' }}<|im_start|>user{{ ' | |
| ' }}{%- set text_items = message['content'] | selectattr('type', 'equalto', 'text') | list -%} | |
| {%- set context_text = text_items[0]['text'] if text_items else none -%} | |
| {%- for item in message['content'] -%} | |
| {%- if item['type'] == 'audio' -%} | |
| {{ audio_start_token }}{{ audio_token }}{{ audio_end_token }}{{ " | |
| " }}{%- if context_text -%} | |
| This is a <|AUDIO_DURATION|> seconds audio, with extra info: {{ context_text }} | |
| Please transcribe it with these keys: Start time, End time, Speaker ID, Content{%- else -%} | |
| This is a <|AUDIO_DURATION|> seconds audio, please transcribe it with these keys: Start time, End time, Speaker ID, Content{%- endif -%} | |
| {%- endif -%} | |
| {%- endfor -%} | |
| <|im_end|>{{ ' | |
| ' }} | |
| {%- endif -%} | |
| {%- endfor -%} |