bezzam HF Staff commited on
Commit
ec5c05c
·
verified ·
1 Parent(s): cd7c311

Upload processor

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set system_prompt = system_prompt | default("You are a helpful assistant that transcribes audio input into text output in JSON format.") -%}
2
+ {{ system_prompt }}
3
+ {%- set audio_token = audio_token | default("<|speech_pad|>") %}
4
+ {%- set audio_start_token = "<|speech_start|>" %}
5
+ {%- set audio_end_token = "<|speech_end|>" %}
6
+ {%- for message in messages %}
7
+ {%- if message['role'] == 'user' %}
8
+ {%- for item in message['content'] %}
9
+ {%- if item['type'] == 'audio' %}
10
+ {{ audio_start_token }}{{ audio_token }}{{ audio_end_token }}
11
+ {{ "
12
+ " }}{%- if item.get('context_info') %}This is a <|AUDIO_DURATION|> seconds audio, with extra info: {{ item['context_info'] }}
13
+
14
+ Please transcribe it with these keys: Start time, End time, Speaker ID, Content{%- else %}This is a <|AUDIO_DURATION|> seconds audio, please transcribe it with these keys: Start time, End time, Speaker ID, Content{%- endif %}
15
+ {%- elif item['type'] == 'text' %}
16
+ {{ item['text'] }}
17
+ {%- endif %}
18
+ {%- endfor %}
19
+ {%- endif %}
20
+ {%- endfor %}
processor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "feature_extractor": {
3
+ "eps": 1e-06,
4
+ "feature_extractor_type": "VibeVoiceAcousticTokenizerFeatureExtractor",
5
+ "feature_size": 1,
6
+ "normalize_audio": true,
7
+ "padding_side": "right",
8
+ "padding_value": 0.0,
9
+ "return_attention_mask": true,
10
+ "sampling_rate": 24000,
11
+ "target_dB_FS": -25
12
+ },
13
+ "processor_class": "VibeVoiceAsrProcessor"
14
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8
3
+ size 11421892
tokenizer_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|endoftext|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|object_ref_start|>",
12
+ "<|object_ref_end|>",
13
+ "<|box_start|>",
14
+ "<|box_end|>",
15
+ "<|quad_start|>",
16
+ "<|quad_end|>",
17
+ "<|vision_start|>",
18
+ "<|vision_end|>",
19
+ "<|vision_pad|>",
20
+ "<|image_pad|>",
21
+ "<|video_pad|>"
22
+ ],
23
+ "is_local": false,
24
+ "model_max_length": 131072,
25
+ "pad_token": "<|endoftext|>",
26
+ "processor_class": "VibeVoiceAsrProcessor",
27
+ "split_special_tokens": false,
28
+ "tokenizer_class": "Qwen2Tokenizer",
29
+ "unk_token": null
30
+ }