Upload processor

Files changed (4) hide show

chat_template.jinja ADDED Viewed

+{%- macro to_text(content) -%}
+{%- if content is string -%}
+{{- content -}}
+{%- elif content is iterable and content is not mapping -%}
+{%- for item in content -%}
+{%- if item is mapping and item.type == 'text' and item.text is defined -%}
+{{- item.text -}}
+{%- elif item is mapping and (item.type == 'audio' or 'audio' in item) -%}
+<|begin_of_audio|><|pad|><|end_of_audio|><|user|>
+{% elif item is string -%}
+{{- item -}}
+{%- endif -%}
+{%- endfor -%}
+{%- else -%}
+{{- content -}}
+{%- endif -%}
+{%- endmacro -%}
+{%- for m in messages -%}
+{%- if m.role == 'system' -%}
+<|system|>
+{{ to_text(m.content) | trim }}
+{%- elif m.role == 'user' -%}
+<|user|>
+{{ to_text(m.content) | trim }}
+{%- elif m.role == 'assistant' -%}
+<|assistant|>
+{{ to_text(m.content) | trim }}
+{%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+<|assistant|>
+{% endif -%}

processor_config.json ADDED Viewed

+{
+  "audio_bos_token": "<|begin_of_audio|>",
+  "audio_eos_token": "<|end_of_audio|>",
+  "audio_token": "<|pad|>",
+  "feature_extractor": {
+    "chunk_length": 30,
+    "dither": 0.0,
+    "feature_extractor_type": "WhisperFeatureExtractor",
+    "feature_size": 128,
+    "hop_length": 160,
+    "n_fft": 400,
+    "n_samples": 480000,
+    "nb_max_frames": 3000,
+    "padding_side": "right",
+    "padding_value": 0.0,
+    "return_attention_mask": false,
+    "sampling_rate": 16000
+  },
+  "processor_class": "GlmasrProcessor"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

+{
+  "backend": "tokenizers",
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": [
+    "<|endoftext|>",
+    "[MASK]",
+    "[gMASK]",
+    "[sMASK]",
+    "<sop>",
+    "<eop>",
+    "<|system|>",
+    "<|user|>",
+    "<|assistant|>",
+    "<|observation|>",
+    "<|begin_of_image|>",
+    "<|end_of_image|>"
+  ],
+  "is_local": false,
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 65536,
+  "model_specific_special_tokens": {},
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "processor_class": "GlmasrProcessor",
+  "remove_space": false,
+  "tokenizer_class": "TokenizersBackend"
+}