Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

.gitattributes +1 -0
config.json +36 -0
configuration_residualnet.py +6 -0
merges.txt +0 -0
model.safetensors +3 -0
modeling_residualnet.py +395 -0
special_tokens_map.json +31 -0
tokenizer.json +3 -0
tokenizer_config.json +239 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "architectures": [
+    "ResidualNetForCausalLM"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_residualnet.ResidualNetConfig",
+    "AutoModel": "modeling_residualnet.ResidualNetModel",
+    "AutoModelForCausalLM": "modeling_residualnet.ResidualNetForCausalLM"
+  },
+  "attention_dropout": 0.0,
+  "bos_token_id": null,
+  "embd_pdrop": 0.0,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 128,
+  "initializer_range": 0.02,
+  "intermediate_size": 64,
+  "max_position_embeddings": 1024,
+  "model_type": "ResidualNetConfig",
+  "name": "residual-tiny",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 4,
+  "num_key_value_heads": 4,
+  "original_max_position_embeddings": 1024,
+  "pad_token_id": 151645,
+  "resid_pdrop": 0.0,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.2",
+  "use_cache": true,
+  "vocab_size": 151669
+}

configuration_residualnet.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from transformers.models.phi3.configuration_phi3 import Phi3Config
+class ResidualNetConfig(Phi3Config):
+    model_type = "ResidualNetConfig"
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6dca59cfa6bb47533b74fe5b471785c6dd689e339e48a9259955bec389e52fc
+size 79368760

modeling_residualnet.py ADDED Viewed

	@@ -0,0 +1,395 @@

+"""
+「差分→Attn→Dense を前半層で繰り返し、後半層は“Denseでのアップスケール(=長さ+1)”→Attn→Dense を繰り返して最終的に元の seq_len に戻す」アーキテクチャ
+"""
+from typing import Optional, Tuple, List
+import torch
+from torch import nn
+from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask
+from transformers.modeling_outputs import CausalLMOutputWithPast
+from transformers.generation.utils import GenerationMixin
+# from transformers.models.phi3.configuration_phi3 import Phi3Config
+# from transformers.models.phi3.modeling_phi3 import (
+#     Phi3PreTrainedModel,
+#     Phi3RotaryEmbedding,
+#     Phi3RMSNorm,
+#     Phi3Attention,
+#     # Phi3SdpaAttention,   # 既定の SDPA 注意
+#     Phi3MLP,
+# )
+from models.phi3_config import Phi3Config
+from models.phi3 import (
+    Phi3PreTrainedModel,
+    Phi3RMSNorm,
+    Phi3MLP,
+    # Phi3SdpaAttention,
+    Phi3Attention,
+    Phi3RotaryEmbedding,
+)
+class ResidualNetConfig(Phi3Config):
+    model_type = "ResidualNetConfig"
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+# ---------- 長さ変換用の前処理 ----------
+class DiffPreprocessor(nn.Module):
+    """一次差分: (B, L, H) -> (B, L-1, H) と 2D mask の AND 縮約"""
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask_2d: Optional[torch.Tensor],
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        # hidden_states: (B, L, H)
+        x1 = hidden_states[:, 1:, :]
+        x0 = hidden_states[:, :-1, :]
+        diff = x1 - x0  # (B, L-1, H)
+        if attention_mask_2d is not None:
+            m = (attention_mask_2d[:, 1:].bool() & attention_mask_2d[:, :-1].bool()).to(attention_mask_2d.dtype)
+        else:
+            m = None
+        return diff, m
+class IntegratePreprocessor(nn.Module):
+    """
+    学習可能な“積分”で (B, m, H) -> (B, m+1, H)
+      1) seed y0 = MLP(mean_pool(z))
+      2) y = cumsum([y0, z], dim=1)
+    """
+    def __init__(self, hidden_size: int):
+        super().__init__()
+        self.seed_mlp = nn.Sequential(
+            nn.Linear(hidden_size, hidden_size, bias=True),
+            nn.SiLU(),
+            nn.Linear(hidden_size, hidden_size, bias=True),
+        )
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask_2d: Optional[torch.Tensor],
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        # hidden_states: (B, m, H)
+        if attention_mask_2d is not None:
+            denom = attention_mask_2d.sum(dim=1, keepdim=True).clamp_min(1)
+            pooled = (hidden_states * attention_mask_2d.unsqueeze(-1)).sum(dim=1) / denom  # (B, H)
+            batch_valid = (attention_mask_2d.sum(dim=1) > 0).to(attention_mask_2d.dtype)  # (B,)
+        else:
+            pooled = hidden_states.mean(dim=1)
+            batch_valid = None
+        y0 = self.seed_mlp(pooled).unsqueeze(1)  # (B,1,H)
+        y = torch.cumsum(torch.cat([y0, hidden_states], dim=1), dim=1)  # (B, m+1, H)
+        if attention_mask_2d is not None:
+            new_first = batch_valid.unsqueeze(1)  # (B,1)
+            mask = torch.cat([new_first, attention_mask_2d], dim=1)
+        else:
+            mask = None
+        return y, mask
+# ---------- レイヤーブロック（Phi3 部品で構成） ----------
+class ResidualDiffLayer(nn.Module):
+    """
+    (差分で L-1) -> Attn -> MLP
+    - RoPE は Phi-3 と同様に Attention 内で適用
+    - 各層で position_ids を 0..len-1 に張り直す
+    """
+    def __init__(self, config: ResidualNetConfig, layer_idx: int, rotary_emb: Phi3RotaryEmbedding):
+        super().__init__()
+        self.config = config
+        self.layer_idx = layer_idx
+        self.input_norm = Phi3RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.pre = DiffPreprocessor()
+        # self.attn = Phi3SdpaAttention(config, layer_idx=layer_idx)
+        self.attn = Phi3Attention(config, layer_idx=layer_idx)
+        self.dropout_attn = nn.Dropout(config.resid_pdrop)
+        self.post_norm = Phi3RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.mlp = Phi3MLP(config)
+        self.dropout_mlp = nn.Dropout(config.resid_pdrop)
+        self.rotary_emb = rotary_emb  # 共有 RoPE インスタンス
+    def _to_4d_mask(
+        self, mask2d: Optional[torch.Tensor], bsz: int, seqlen: int, hidden_states: torch.Tensor
+    ) -> Optional[torch.Tensor]:
+        if mask2d is None:
+            return None
+        return _prepare_4d_causal_attention_mask(
+            mask2d, (bsz, seqlen), hidden_states, past_key_values_length=0, sliding_window=self.config.sliding_window
+        )
+    def forward(
+        self,
+        hidden_states: torch.Tensor,            # (B, L, H)
+        attention_mask_2d: Optional[torch.Tensor],  # (B, L)
+        position_ids: Optional[torch.LongTensor],   # (B, L)
+        output_attentions: bool = False,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]:
+        x = self.input_norm(hidden_states)
+        # L -> L-1
+        x, mask2d = self.pre(x, attention_mask_2d)
+        bsz, seqlen, _ = x.shape
+        # position_ids を再生成（0..seqlen-1）
+        device = x.device
+        pos_ids = torch.arange(seqlen, device=device).unsqueeze(0).expand(bsz, -1)
+        position_embeddings = self.rotary_emb(hidden_states, pos_ids)
+        attn_mask_4d = self._to_4d_mask(mask2d, bsz, seqlen, x)
+        attn_out, attn_weights = self.attn(
+            hidden_states=x,
+            attention_mask=attn_mask_4d,
+            position_ids=pos_ids,
+            position_embeddings=position_embeddings,
+            past_key_value=None,
+            output_attentions=output_attentions,
+            use_cache=False,
+        )
+        x = x + self.dropout_attn(attn_out)
+        h = self.post_norm(x)
+        h = self.mlp(h)
+        x = x + self.dropout_mlp(h)
+        return x, mask2d, attn_weights if output_attentions else None
+class IntegrateUpscaleLayer(nn.Module):
+    """
+    (積分で L+1) -> Attn -> MLP
+    """
+    def __init__(self, config: ResidualNetConfig, layer_idx: int, rotary_emb: Phi3RotaryEmbedding):
+        super().__init__()
+        self.config = config
+        self.layer_idx = layer_idx
+        self.input_norm = Phi3RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.pre = IntegratePreprocessor(config.hidden_size)
+        # self.attn = Phi3SdpaAttention(config, layer_idx=layer_idx)
+        self.attn = Phi3Attention(config, layer_idx=layer_idx)
+        self.dropout_attn = nn.Dropout(config.resid_pdrop)
+        self.post_norm = Phi3RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.mlp = Phi3MLP(config)
+        self.dropout_mlp = nn.Dropout(config.resid_pdrop)
+        self.rotary_emb = rotary_emb
+    def _to_4d_mask(
+        self, mask2d: Optional[torch.Tensor], bsz: int, seqlen: int, hidden_states: torch.Tensor
+    ) -> Optional[torch.Tensor]:
+        if mask2d is None:
+            return None
+        return _prepare_4d_causal_attention_mask(
+            mask2d, (bsz, seqlen), hidden_states, past_key_values_length=0, sliding_window=self.config.sliding_window
+        )
+    def forward(
+        self,
+        hidden_states: torch.Tensor,            # (B, L, H)
+        attention_mask_2d: Optional[torch.Tensor],  # (B, L)
+        position_ids: Optional[torch.LongTensor],   # (B, L)
+        output_attentions: bool = False,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]:
+        x = self.input_norm(hidden_states)
+        # L -> L+1
+        x, mask2d = self.pre(x, attention_mask_2d)
+        bsz, seqlen, _ = x.shape
+        # position_ids を再生成（0..seqlen-1）
+        device = x.device
+        pos_ids = torch.arange(seqlen, device=device).unsqueeze(0).expand(bsz, -1)
+        position_embeddings = self.rotary_emb(hidden_states, pos_ids)
+        attn_mask_4d = self._to_4d_mask(mask2d, bsz, seqlen, x)
+        attn_out, attn_weights = self.attn(
+            hidden_states=x,
+            attention_mask=attn_mask_4d,
+            position_ids=pos_ids,
+            position_embeddings=position_embeddings,
+            past_key_value=None,
+            output_attentions=output_attentions,
+            use_cache=False,
+        )
+        x = x + self.dropout_attn(attn_out)
+        h = self.post_norm(x)
+        h = self.mlp(h)
+        x = x + self.dropout_mlp(h)
+        return x, mask2d, attn_weights if output_attentions else None
+# ---------- モデル本体（Phi3PreTrainedModel を継承） ----------
+class ResidualNetModel(Phi3PreTrainedModel):
+    """
+    前半: ResidualDiffLayer × (N/2) で系列長を縮約
+    後半: IntegrateUpscaleLayer × (N/2) で系列長を復元
+    """
+    def __init__(self, config: ResidualNetConfig):
+        super().__init__(config)
+        assert config.num_hidden_layers % 2 == 0, "num_hidden_layers は偶数にしてください。"
+        self.padding_idx = config.pad_token_id
+        self.vocab_size = config.vocab_size
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
+        self.norm = Phi3RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.rotary_emb = Phi3RotaryEmbedding(config=config)
+        self.gradient_checkpointing = False
+        half = config.num_hidden_layers // 2
+        # 前半 (down)
+        self.down_layers = nn.ModuleList(
+            [ResidualDiffLayer(config, layer_idx=i, rotary_emb=self.rotary_emb) for i in range(half)]
+        )
+        # 後半 (up)
+        self.up_layers = nn.ModuleList(
+            [IntegrateUpscaleLayer(config, layer_idx=half + i, rotary_emb=self.rotary_emb) for i in range(half)]
+        )
+        # Initialize weights and apply final processing
+        self.post_init()
+    def forward(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,  # (B, L) in {0,1}
+        position_ids: Optional[torch.LongTensor] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        use_cache: Optional[bool] = None,  # 未対応（強制 False）
+    ):
+        output_attentions = output_attentions if output_attentions is not None else False
+        output_hidden_states = output_hidden_states if output_hidden_states is not None else False
+        return_dict = True if return_dict is None else return_dict
+        if input_ids is None and inputs_embeds is None:
+            raise ValueError("You must specify either input_ids or inputs_embeds.")
+        if inputs_embeds is None:
+            hidden_states = self.embed_tokens(input_ids)  # (B, L, H)
+        else:
+            hidden_states = inputs_embeds
+        mask2d = attention_mask
+        bsz, orig_len, _ = hidden_states.shape
+        all_hidden_states: List[torch.Tensor] = [] if output_hidden_states else None
+        all_attns: List[torch.Tensor] = [] if output_attentions else None
+        # ---- 前半: 差分で縮約 ----
+        for layer in self.down_layers:
+            if output_hidden_states:
+                all_hidden_states.append(hidden_states)
+            hidden_states, mask2d, attn = layer(
+                hidden_states, mask2d, position_ids, output_attentions=output_attentions
+            )
+            if output_attentions:
+                all_attns.append(attn)
+        # ---- 後半: 積分で復元 ----
+        for layer in self.up_layers:
+            if output_hidden_states:
+                all_hidden_states.append(hidden_states)
+            hidden_states, mask2d, attn = layer(
+                hidden_states, mask2d, position_ids, output_attentions=output_attentions
+            )
+            if output_attentions:
+                all_attns.append(attn)
+        # 最終長の整合性（念のため）
+        if hidden_states.size(1) != orig_len:
+            raise RuntimeError(f"seq_len が復元されていません: got {hidden_states.size(1)} vs {orig_len}")
+        hidden_states = self.norm(hidden_states)
+        if not return_dict:
+            out = (hidden_states,)
+            if output_hidden_states:
+                out = out + (all_hidden_states,)
+            if output_attentions:
+                out = out + (all_attns,)
+            return out
+        return {
+            "last_hidden_state": hidden_states,
+            "hidden_states": all_hidden_states,
+            "attentions": all_attns,
+        }
+# ---------- CausalLM ヘッド（Phi3PreTrainedModel + GenerationMixin） ----------
+class ResidualNetForCausalLM(Phi3PreTrainedModel, GenerationMixin):
+    _tied_weights_keys = ["lm_head.weight"]
+    _tp_plan = {"lm_head": "colwise_rep"}
+    _pp_plan = {"lm_head": (["hidden_states"], ["logits"])}
+    def __init__(self, config: ResidualNetConfig):
+        super().__init__(config)
+        self.model = ResidualNetModel(config)
+        self.vocab_size = config.vocab_size
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+        # weight tying
+        self.lm_head.weight = self.model.embed_tokens.weight
+        # Initialize weights and apply final processing
+        self.post_init()
+    def forward(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        use_cache: Optional[bool] = None,  # 未対応
+        past_key_values: Optional[List[torch.Tensor]] = None,  # 未対応
+    ) -> CausalLMOutputWithPast:
+        return_dict = True if return_dict is None else return_dict
+        model_out = self.model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=True,
+            use_cache=False,
+        )
+        hidden_states = model_out["last_hidden_state"]  # (B, L, H)
+        logits = self.lm_head(hidden_states).float()
+        loss = None
+        if labels is not None:
+            # 因果言語モデリング損失
+            shift_logits = logits[:, :-1, :].contiguous()
+            shift_labels = labels[:, 1:].contiguous()
+            loss_fct = nn.CrossEntropyLoss()
+            loss = loss_fct(shift_logits.view(-1, self.vocab_size), shift_labels.view(-1))
+        if not return_dict:
+            return (logits, loss)
+        return CausalLMOutputWithPast(
+            loss=loss,
+            logits=logits,
+            past_key_values=None,  # 未対応
+            hidden_states=model_out["hidden_states"],
+            attentions=model_out["attentions"],
+        )
+    @property
+    def base_model(self):
+        return self.model

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
+size 11422654

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,239 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff