bezzam
/

VibeVoice-ASR-7B

Audio-Text-to-Text

automatic-speech-recognition

Model card Files Files and versions

bezzam HF Staff commited on Jan 30

Commit

0d33119

·

verified ·

1 Parent(s): 3414b83

Upload processor

Files changed (1) hide show

chat_template.jinja +20 -15

chat_template.jinja CHANGED Viewed

@@ -1,21 +1,26 @@
 {%- set system_prompt = system_prompt | default("You are a helpful assistant that transcribes audio input into text output in JSON format.") -%}
-{{ system_prompt }}
-{%- set audio_token = audio_token | default("<|box_start|>") %}
-{%- set audio_start_token = "<|object_ref_start|>" %}
-{%- set audio_end_token = "<|object_ref_end|>" %}
-{%- for message in messages %}
-    {%- if message['role'] == 'user' %}
-        {%- set text_items = message['content'] | selectattr('type', 'equalto', 'text') | list %}
-        {%- set context_text = text_items[0]['text'] if text_items else none %}
-        {%- for item in message['content'] %}
-            {%- if item['type'] == 'audio' %}
 {{ audio_start_token }}{{ audio_token }}{{ audio_end_token }}{{ "
 " }}{%- if context_text -%}
 This is a <|AUDIO_DURATION|> seconds audio, with extra info: {{ context_text }}
 Please transcribe it with these keys: Start time, End time, Speaker ID, Content{%- else -%}
-This is a <|AUDIO_DURATION|> seconds audio, please transcribe it with these keys: Start time, End time, Speaker ID, Content{%- endif %}
-            {%- endif %}
-        {%- endfor %}
-    {%- endif %}
-{%- endfor %}

 {%- set system_prompt = system_prompt | default("You are a helpful assistant that transcribes audio input into text output in JSON format.") -%}
+<|im_start|>system
+{{ system_prompt }}<|im_end|>
+{%- set audio_token = audio_token | default("<|box_start|>") -%}
+{%- set audio_start_token = "<|object_ref_start|>" -%}
+{%- set audio_end_token = "<|object_ref_end|>" -%}
+{%- for message in messages -%}
+    {%- if message['role'] == 'user' -%}
+{{ '
+' }}<|im_start|>user{{ '
+' }}{%- set text_items = message['content'] | selectattr('type', 'equalto', 'text') | list -%}
+        {%- set context_text = text_items[0]['text'] if text_items else none -%}
+        {%- for item in message['content'] -%}
+            {%- if item['type'] == 'audio' -%}
 {{ audio_start_token }}{{ audio_token }}{{ audio_end_token }}{{ "
 " }}{%- if context_text -%}
 This is a <|AUDIO_DURATION|> seconds audio, with extra info: {{ context_text }}
 Please transcribe it with these keys: Start time, End time, Speaker ID, Content{%- else -%}
+This is a <|AUDIO_DURATION|> seconds audio, please transcribe it with these keys: Start time, End time, Speaker ID, Content{%- endif -%}
+            {%- endif -%}
+        {%- endfor -%}
+<|im_end|>{{ '
+' }}
+    {%- endif -%}
+{%- endfor -%}