Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

.gitattributes +1 -0
README.md +58 -0
added_tokens.json +26 -0
chat_template.jinja +54 -0
config.json +15 -0
configuration_borealis.py +20 -0
merges.txt +0 -0
modeling_borealis.py +265 -0
preprocessor_config.json +15 -0
pytorch_model.bin +3 -0
special_tokens_map.json +32 -0
tokenizer.json +3 -0
tokenizer_config.json +213 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+license: apache-2.0
+language:
+- ru
+pipeline_tag: automatic-speech-recognition
+---
+## Borealis
+### Описание
+**Borealis** - это наша первая ASR модель для русского языка
+### Использование
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer, AutoFeatureExtractor
+import torch
+model = AutoModelForCausalLM.from_pretrained("Vikhrmodels/Borealis", trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained("Vikhrmodels/Borealis")
+extractor = AutoFeatureExtractor.from_pretrained("Vikhrmodels/Borealis")
+generation_params = {
+        "max_new_tokens": 350,
+        "do_sample": True,
+        "top_p": 0.9,
+        "top_k": 50,
+        "temperature": 0.2,
+ }
+model.eval()
+model.to("cuda")
+waveform, sr = librosa.load("path/to/your/audio.wav", sr=16_000)
+proc = extractor(
+        waveform,
+        sampling_rate=sr,
+        padding="max_length",
+        max_length=480_000,
+        return_tensors="pt",
+    )
+mel = proc.input_features.squeeze(0).to(device)
+with torch.inference_mode():
+    transcript = model.generate(mel=mel, att_mask=att_mask, **generation_params)
+print(transcript)
+```

added_tokens.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|end_of_audio|>": 151666,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|start_of_audio|>": 151665,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,54 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- messages[0]['content'] }}
+    {%- else %}
+        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
+    {%- endif %}
+    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
+    {%- else %}
+        {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role }}
+        {%- if message.content %}
+            {{- '\n' + message.content }}
+        {%- endif %}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '\n<tool_call>\n{"name": "' }}
+            {{- tool_call.name }}
+            {{- '", "arguments": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- '}\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}

config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "architectures": [
+    "BorealisForConditionalGeneration"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_borealis.BorealisConfig",
+    "AutoModelForCausalLM": "modeling_borealis.BorealisForConditionalGeneration"
+  },
+  "downsample_factor": 4,
+  "llm_name": "unsloth/Qwen2.5-0.5B-Instruct",
+  "model_type": "borealis",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.55.4",
+  "whisper_encoder_name": "openai/whisper-large-v3"
+}

configuration_borealis.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from transformers import PretrainedConfig
+class BorealisConfig(PretrainedConfig):
+    model_type = "borealis"
+    def __init__(
+        self,
+        whisper_encoder_name: str = "openai/whisper-large-v3",
+        llm_name: str = "unsloth/Qwen2.5-0.5B-Instruct",
+        downsample_factor: int = 4,
+        **kwargs,
+    ):
+        self.whisper_encoder_name = whisper_encoder_name
+        self.llm_name = llm_name
+        self.downsample_factor = downsample_factor
+        super().__init__(**kwargs)
+BorealisConfig.register_for_auto_class()

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

modeling_borealis.py ADDED Viewed

	@@ -0,0 +1,265 @@

+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import WhisperModel, PreTrainedModel, WhisperFeatureExtractor
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from huggingface_hub import PyTorchModelHubMixin
+from .configuration_borealis import BorealisConfig
+from huggingface_hub import hf_hub_download
+import os
+class AudioLanguageAdapter(nn.Module):
+    def __init__(self, hidden_size: int, dim: int) -> None:
+        super().__init__()
+        self.w_in = nn.Linear(hidden_size, dim, bias=False)
+        self.gelu = nn.GELU()
+        self.w_out = nn.Linear(dim, dim, bias=False)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.w_out(self.gelu(self.w_in(x)))
+class BorealisForConditionalGeneration(PreTrainedModel, PyTorchModelHubMixin):
+    config_class = BorealisConfig
+    def __init__(self, config: BorealisConfig, language_model=None, tokenizer=None):
+        super().__init__(config)
+        assert tokenizer is not None, "Tokenizer надо передать в модельку"
+        self.encoder: WhisperModel = WhisperModel.from_pretrained(
+            config.whisper_encoder_name
+        ).encoder
+        self.encoder.to(torch.bfloat16)
+        self.encoder.eval()
+        for p in self.encoder.parameters():
+            p.requires_grad = False
+        self.llm = language_model
+        self.tokenizer = tokenizer
+        self.llm.resize_token_embeddings(len(tokenizer))
+        print("Pad token:", self.llm.config.pad_token_id)
+        print("EOS token:", self.llm.config.eos_token_id)
+        print("Tokenizer EOS token ID:", tokenizer.eos_token_id)
+        print("Tokenizer PAD token ID:", tokenizer.pad_token_id)
+        self.downsample_factor = config.downsample_factor
+        self.adapter = AudioLanguageAdapter(
+            hidden_size=self.encoder.config.d_model * self.downsample_factor,
+            dim=self.llm.config.hidden_size,
+        )
+        self.adapter.to(torch.bfloat16)
+        self.bos_id = tokenizer.convert_tokens_to_ids("<|im_start|>")
+        self.audio_start_id = tokenizer.convert_tokens_to_ids("<|start_of_audio|>")
+        self.audio_end_id = tokenizer.convert_tokens_to_ids("<|end_of_audio|>")
+    def _downsample(self, seq: torch.Tensor) -> torch.Tensor:
+        k, (T, d) = self.downsample_factor, seq.shape
+        target = k * math.ceil(T / k)
+        if target != T:
+            seq = F.pad(seq, (0, 0, 0, target - T))
+        return seq.contiguous().view(target // k, d * k)
+    def _tok_embed(self, tok_id: int, batch: int, device) -> torch.Tensor:
+        idx = torch.full((batch, 1), tok_id, dtype=torch.long, device=device)
+        return self.llm.get_input_embeddings()(idx)
+    def forward(
+        self,
+        mel: torch.Tensor,
+        audio_att_mask: torch.Tensor,
+        labels: torch.Tensor,
+        text_att_mask: torch.Tensor,
+    ):
+        B, device = mel.size(0), mel.device
+        enc_out = self.encoder(
+            input_features=mel, attention_mask=None, return_dict=True
+        ).last_hidden_state
+        audio_embs, audio_mask, max_T = [], [], 0
+        for seq in enc_out:
+            ds = self._downsample(seq)
+            audio_embs.append(ds)
+            max_T = max(max_T, ds.size(0))
+        for ds in audio_embs:
+            pad = max_T - ds.size(0)
+            audio_mask.append(
+                torch.cat(
+                    [
+                        torch.ones(ds.size(0), dtype=torch.long, device=device),
+                        torch.zeros(pad, dtype=torch.long, device=device),
+                    ]
+                )
+            )
+            if pad:
+                ds = F.pad(ds, (0, 0, 0, pad))
+        audio_embeddings = torch.stack(audio_embs, 0)
+        audio_mask = torch.stack(audio_mask, 0)
+        audio_embeddings = self.adapter(audio_embeddings)
+        text_embeddings = self.llm.get_input_embeddings()(labels)
+        sa_positions = (labels == self.audio_start_id).nonzero(as_tuple=True)
+        ea_positions = (labels == self.audio_end_id).nonzero(as_tuple=True)
+        inputs_embeds = []
+        att_mask = []
+        for b in range(B):
+            sa_idx = sa_positions[1][sa_positions[0] == b].item()
+            ea_idx = ea_positions[1][ea_positions[0] == b].item()
+            prefix_emb = text_embeddings[b, : sa_idx + 1]
+            postfix_emb = text_embeddings[b, ea_idx:]
+            emb = torch.cat([prefix_emb, audio_embeddings[b], postfix_emb], dim=0)
+            prefix_mask = text_att_mask[b, : sa_idx + 1]
+            postfix_mask = text_att_mask[b, ea_idx:]
+            full_mask = torch.cat([prefix_mask, audio_mask[b], postfix_mask], dim=0)
+            inputs_embeds.append(emb)
+            att_mask.append(full_mask)
+        inputs_embeds = torch.nn.utils.rnn.pad_sequence(
+            inputs_embeds, batch_first=True, padding_value=0.0
+        )
+        att_mask = torch.nn.utils.rnn.pad_sequence(
+            att_mask, batch_first=True, padding_value=0
+        )
+        assistant_prompt = self.tokenizer(
+            "<|im_start|>assistant\n", add_special_tokens=False
+        ).input_ids
+        assistant_starts = []
+        for b in range(B):
+            seq = labels[b]
+            for i in range(len(seq) - len(assistant_prompt)):
+                if torch.equal(
+                    seq[i : i + len(assistant_prompt)],
+                    torch.tensor(assistant_prompt, device=device),
+                ):
+                    assistant_start = i + len(assistant_prompt)
+                    break
+            else:
+                raise ValueError("Assistant prompt not found")
+            assistant_starts.append(assistant_start + (ea_idx - sa_idx - 1) + max_T)
+        max_len = inputs_embeds.size(1)
+        loss_labels = labels.new_full((B, max_len), -100)
+        for b in range(B):
+            orig_assist_start = assistant_starts[b] - max_T - (ea_idx - sa_idx - 1)
+            content_len = len(labels[b]) - orig_assist_start
+            loss_labels[b, assistant_starts[b] : assistant_starts[b] + content_len] = (
+                labels[b, orig_assist_start:]
+            )
+        if self.tokenizer.pad_token_id is not None:
+            loss_labels[loss_labels == self.tokenizer.pad_token_id] = -100
+        out = self.llm(
+            inputs_embeds=inputs_embeds,
+            attention_mask=att_mask,
+            labels=loss_labels,
+            return_dict=True,
+        )
+        return out.loss, out.logits
+    @torch.no_grad()
+    def generate(
+        self,
+        mel: torch.Tensor,
+        att_mask: torch.Tensor,
+        max_new_tokens: int = 512,
+        **kwargs,
+    ):
+        return_tokens = kwargs.pop("return_tokens", False)
+        single = mel.dim() == 2
+        if single:
+            mel, att_mask = mel.unsqueeze(0), att_mask.unsqueeze(0)
+        mel = mel.to(torch.bfloat16)
+        B, device = mel.size(0), mel.device
+        enc_out = self.encoder(
+            input_features=mel, attention_mask=None, return_dict=True
+        ).last_hidden_state
+        audio_embs, audio_mask, max_T = [], [], 0
+        for seq in enc_out:
+            ds = self._downsample(seq)
+            audio_embs.append(ds)
+            max_T = max(max_T, ds.size(0))
+        for i, ds in enumerate(audio_embs):
+            pad = max_T - ds.size(0)
+            audio_mask.append(
+                torch.cat(
+                    [
+                        torch.ones(ds.size(0), dtype=torch.long, device=device),
+                        torch.zeros(pad, dtype=torch.long, device=device),
+                    ]
+                )
+            )
+            if pad:
+                audio_embs[i] = F.pad(ds, (0, 0, 0, pad))
+        audio_embeddings = torch.stack(audio_embs, 0)
+        audio_mask = torch.stack(audio_mask, 0)
+        audio_embeddings = self.adapter(audio_embeddings)
+        messages = [
+            {
+                "role": "system",
+                "content": "Вы полезный помощник по автоматическому распознаванию речи. Точно транскрибируйте аудио в текст.",
+            },
+            {
+                "role": "user",
+                "content": "Транскрибируйте это аудио: <|start_of_audio|><|end_of_audio|>",
+            },
+        ]
+        chat_text = self.tokenizer.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        model_inputs = self.tokenizer(chat_text, return_tensors="pt").to(device)
+        input_ids = model_inputs.input_ids.repeat(B, 1)
+        text_att_mask = model_inputs.attention_mask.repeat(B, 1)
+        text_embeddings = self.llm.get_input_embeddings()(input_ids)
+        sa_idx = (input_ids[0] == self.audio_start_id).nonzero(as_tuple=True)[0].item()
+        ea_idx = (input_ids[0] == self.audio_end_id).nonzero(as_tuple=True)[0].item()
+        inputs_embeds = []
+        full_att_mask = []
+        for b in range(B):
+            prefix_emb = text_embeddings[b, : sa_idx + 1]
+            postfix_emb = text_embeddings[b, ea_idx:]
+            emb = torch.cat([prefix_emb, audio_embeddings[b], postfix_emb], dim=0)
+            prefix_mask = text_att_mask[b, : sa_idx + 1]
+            postfix_mask = text_att_mask[b, ea_idx:]
+            mask = torch.cat([prefix_mask, audio_mask[b], postfix_mask], dim=0)
+            inputs_embeds.append(emb)
+            full_att_mask.append(mask)
+        inputs_embeds = torch.nn.utils.rnn.pad_sequence(
+            inputs_embeds, batch_first=True, padding_value=0.0
+        )
+        att_mask = torch.nn.utils.rnn.pad_sequence(
+            full_att_mask, batch_first=True, padding_value=0
+        )
+        gen_ids = self.llm.generate(
+            inputs_embeds=inputs_embeds,
+            attention_mask=att_mask,
+            max_new_tokens=max_new_tokens,
+            eos_token_id=self.tokenizer.eos_token_id,
+            **kwargs,
+        )
+        if return_tokens:
+            return gen_ids[0] if single else gen_ids
+        else:
+            txt = self.tokenizer.batch_decode(gen_ids, skip_special_tokens=True)
+            return txt[0] if single else txt
+    def save_pretrained(self, save_directory, **kwargs):
+        os.makedirs(save_directory, exist_ok=True)
+        self.config.save_pretrained(save_directory)
+        state_dict = self.state_dict()
+        torch.save(state_dict, os.path.join(save_directory, "pytorch_model.bin"))
+        self.tokenizer.save_pretrained(save_directory)
+        extractor = WhisperFeatureExtractor.from_pretrained(
+            self.config.whisper_encoder_name
+        )
+        extractor.save_pretrained(save_directory)
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
+        config = BorealisConfig.from_pretrained(pretrained_model_name_or_path)
+        tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path)
+        language_model = AutoModelForCausalLM.from_pretrained(config.llm_name)
+        model = cls(config, language_model=language_model, tokenizer=tokenizer)
+        state_dict_path = hf_hub_download(
+            repo_id=pretrained_model_name_or_path, filename="pytorch_model.bin"
+        )
+        state_dict = torch.load(state_dict_path, map_location="cpu")
+        model.load_state_dict(state_dict)
+        return model
+BorealisForConditionalGeneration.register_for_auto_class("AutoModelForCausalLM")

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "chunk_length": 30,
+  "dither": 0.0,
+  "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 128,
+  "hop_length": 160,
+  "n_fft": 400,
+  "n_samples": 480000,
+  "nb_max_frames": 3000,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:babfecb8b4346c60c9b3fe01e38186bfec189db26626c37d169c008b57fba8ff
+size 2272601487

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<|start_of_audio|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|end_of_audio|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|vision_pad|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55c7fad3b807310f01cead0edd8fa225070d199053eb0649e31f58a1caf09aa2
+size 11422284

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,213 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<|start_of_audio|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151666": {
+      "content": "<|end_of_audio|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|start_of_audio|>",
+    "<|end_of_audio|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 32768,
+  "pad_token": "<|vision_pad|>",
+  "padding_side": "left",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff