Upload folder using huggingface_hub

Browse files

Files changed (11) hide show

added_tokens.json +0 -1
config.json +107 -5
generation_config.json +2 -2
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
modeling_phi3.py +9 -2
special_tokens_map.json +1 -1
tokenizer.json +1 -10
tokenizer_config.json +4 -12

added_tokens.json CHANGED Viewed

@@ -2,7 +2,6 @@
   "<|assistant|>": 32001,
   "<|endoftext|>": 32000,
   "<|end|>": 32007,
-  "<|eot_id|>": 32011,
   "<|placeholder1|>": 32002,
   "<|placeholder2|>": 32003,
   "<|placeholder3|>": 32004,

   "<|assistant|>": 32001,
   "<|endoftext|>": 32000,
   "<|end|>": 32007,
   "<|placeholder1|>": 32002,
   "<|placeholder2|>": 32003,
   "<|placeholder3|>": 32004,

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
   "architectures": [
     "Phi3ForCausalLM"
   ],
@@ -8,7 +8,7 @@
   "auto_map": {
     "AutoConfig": "configuration_phi3.Phi3Config",
     "AutoModel": "modeling_phi3.Phi3ForCausalLM",
-    "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
   },
   "bos_token_id": 1,
   "embd_pdrop": 0.0,
@@ -17,7 +17,7 @@
   "hidden_size": 3072,
   "initializer_range": 0.02,
   "intermediate_size": 8192,
-  "max_position_embeddings": 4096,
   "model_type": "phi3",
   "num_attention_heads": 32,
   "num_hidden_layers": 32,
@@ -26,9 +26,111 @@
   "pad_token_id": 32000,
   "resid_pdrop": 0.0,
   "rms_norm_eps": 1e-05,
-  "rope_scaling": null,
   "rope_theta": 10000.0,
-  "sliding_window": 2047,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.45.0.dev0",

 {
+  "_name_or_path": "microsoft/Phi-3.5-mini-instruct",
   "architectures": [
     "Phi3ForCausalLM"
   ],
   "auto_map": {
     "AutoConfig": "configuration_phi3.Phi3Config",
     "AutoModel": "modeling_phi3.Phi3ForCausalLM",
+    "AutoModelForCausalLM": "microsoft/Phi-3.5-mini-instruct--modeling_phi3.Phi3ForCausalLM"
   },
   "bos_token_id": 1,
   "embd_pdrop": 0.0,
   "hidden_size": 3072,
   "initializer_range": 0.02,
   "intermediate_size": 8192,
+  "max_position_embeddings": 131072,
   "model_type": "phi3",
   "num_attention_heads": 32,
   "num_hidden_layers": 32,
   "pad_token_id": 32000,
   "resid_pdrop": 0.0,
   "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "long_factor": [
+      1.0800000429153442,
+      1.1100000143051147,
+      1.1399999856948853,
+      1.340000033378601,
+      1.5899999141693115,
+      1.600000023841858,
+      1.6200000047683716,
+      2.620000123977661,
+      3.2300000190734863,
+      3.2300000190734863,
+      4.789999961853027,
+      7.400000095367432,
+      7.700000286102295,
+      9.09000015258789,
+      12.199999809265137,
+      17.670000076293945,
+      24.46000099182129,
+      28.57000160217285,
+      30.420001983642578,
+      30.840002059936523,
+      32.590003967285156,
+      32.93000411987305,
+      42.320003509521484,
+      44.96000289916992,
+      50.340003967285156,
+      50.45000457763672,
+      57.55000305175781,
+      57.93000411987305,
+      58.21000289916992,
+      60.1400032043457,
+      62.61000442504883,
+      62.62000274658203,
+      62.71000289916992,
+      63.1400032043457,
+      63.1400032043457,
+      63.77000427246094,
+      63.93000411987305,
+      63.96000289916992,
+      63.970001220703125,
+      64.02999877929688,
+      64.06999969482422,
+      64.08000183105469,
+      64.12000274658203,
+      64.41000366210938,
+      64.4800033569336,
+      64.51000213623047,
+      64.52999877929688,
+      64.83999633789062
+    ],
+    "short_factor": [
+      1.0,
+      1.0199999809265137,
+      1.0299999713897705,
+      1.0299999713897705,
+      1.0499999523162842,
+      1.0499999523162842,
+      1.0499999523162842,
+      1.0499999523162842,
+      1.0499999523162842,
+      1.0699999332427979,
+      1.0999999046325684,
+      1.1099998950958252,
+      1.1599998474121094,
+      1.1599998474121094,
+      1.1699998378753662,
+      1.2899998426437378,
+      1.339999794960022,
+      1.679999828338623,
+      1.7899998426437378,
+      1.8199998140335083,
+      1.8499997854232788,
+      1.8799997568130493,
+      1.9099997282028198,
+      1.9399996995925903,
+      1.9899996519088745,
+      2.0199997425079346,
+      2.0199997425079346,
+      2.0199997425079346,
+      2.0199997425079346,
+      2.0199997425079346,
+      2.0199997425079346,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0299997329711914,
+      2.0799996852874756,
+      2.0899996757507324,
+      2.189999580383301,
+      2.2199995517730713,
+      2.5899994373321533,
+      2.729999542236328,
+      2.749999523162842,
+      2.8399994373321533
+    ],
+    "type": "longrope"
+  },
   "rope_theta": 10000.0,
+  "sliding_window": 262144,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.45.0.dev0",

generation_config.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": [
-    32000,
     32001,
-    32007
   ],
   "pad_token_id": 32000,
   "transformers_version": "4.45.0.dev0"

   "_from_model_config": true,
   "bos_token_id": 1,
   "eos_token_id": [
+    32007,
     32001,
+    32000
   ],
   "pad_token_id": 32000,
   "transformers_version": "4.45.0.dev0"

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4983793de0ac96315a6f9f7a2bd18a1bebe8938e2af6c164943bbbe94a6e959
 size 1958700256

 version https://git-lfs.github.com/spec/v1
+oid sha256:567b7723ea6a425b4e8a511facb5ad710a0b81760fb49dc4d8c7731de2e97e66
 size 1958700256

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e5aba5c0bf7c7cd1a589691478ad19b36b24b150093dd43c920b3825a725063
 size 1937885224

 version https://git-lfs.github.com/spec/v1
+oid sha256:36f4e8677d7423d9557ab59d0d3eec5abd0e0349189836398cb753104d0aaf52
 size 1937885224

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29ca0ea2287b7451cac4dd0c3fbd9ae44fd6e428ecad0d6fc3433eaaa9c5aeb3
 size 1981925424

 version https://git-lfs.github.com/spec/v1
+oid sha256:f485a2b00a23f8f34f33ce2de5a92148a5b58af3040aa2d7ec33c5429fa4d598
 size 1981925424

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5204ae411c6d1f70937f0bcbb593616ba4404bbe551544009b8085f2d92e635
 size 1763670920

 version https://git-lfs.github.com/spec/v1
+oid sha256:52d3e202a2f8ced0888a93ea2d1643cc00b7f45c8507b6d67c36811edb2df55b
 size 1763670920

modeling_phi3.py CHANGED Viewed

@@ -149,7 +149,7 @@ class Phi3LongRoPEScaledRotaryEmbedding(Phi3RotaryEmbedding):
     @torch.no_grad()
     def forward(self, x, position_ids, seq_len=None):
-        seq_len = torch.max(position_ids) + 1
         if seq_len > self.original_max_position_embeddings:
             ext_factors = torch.tensor(self.long_factor, dtype=torch.float32, device=x.device)
         else:
@@ -451,7 +451,7 @@ class Phi3FlashAttention2(Phi3Attention):
             kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
         # Because the input can be padded, the absolute sequence length depends on the max position id.
-        rotary_seq_len = max(kv_seq_len, position_ids[:, -1].max().item()) + 1
         cos, sin = self.rotary_emb(value_states, position_ids, seq_len=rotary_seq_len)
         query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
@@ -1285,6 +1285,13 @@ class Phi3ForCausalLM(Phi3PreTrainedModel):
     def prepare_inputs_for_generation(
         self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
     ):
         if past_key_values is not None:
             if isinstance(past_key_values, Cache):
                 cache_length = past_key_values.get_seq_length()

     @torch.no_grad()
     def forward(self, x, position_ids, seq_len=None):
+        seq_len = seq_len or torch.max(position_ids) + 1
         if seq_len > self.original_max_position_embeddings:
             ext_factors = torch.tensor(self.long_factor, dtype=torch.float32, device=x.device)
         else:
             kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
         # Because the input can be padded, the absolute sequence length depends on the max position id.
+        rotary_seq_len = max(kv_seq_len, position_ids[:, -1].max().item() + 1)
         cos, sin = self.rotary_emb(value_states, position_ids, seq_len=rotary_seq_len)
         query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
     def prepare_inputs_for_generation(
         self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
     ):
+        # When the first time input length reached long and short factor switching point, enforce re-compute cache
+        # It will cause downside of slower at this single token position, however, better than current failure.
+        if past_key_values and self.config.rope_scaling and input_ids.shape[1] >= self.config.original_max_position_embeddings + 1:
+            past_length = past_key_values.seen_tokens if isinstance(past_key_values, Cache) else past_key_values[0][0].shape[2]
+            if past_length <= self.config.original_max_position_embeddings:
+                past_key_values = None
         if past_key_values is not None:
             if isinstance(past_key_values, Cache):
                 cache_length = past_key_values.get_seq_length()

special_tokens_map.json CHANGED Viewed

@@ -7,7 +7,7 @@
     "single_word": false
   },
   "eos_token": {
-    "content": "<|eot_id|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

     "single_word": false
   },
   "eos_token": {
+    "content": "<|end|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer.json CHANGED Viewed

@@ -98,7 +98,7 @@
       "content": "<|end|>",
       "single_word": false,
       "lstrip": false,
-      "rstrip": true,
       "normalized": false,
       "special": true
     },
@@ -128,15 +128,6 @@
       "rstrip": true,
       "normalized": false,
       "special": true
-    },
-    {
-      "id": 32011,
-      "content": "<|eot_id|>",
-      "single_word": false,
-      "lstrip": false,
-      "rstrip": false,
-      "normalized": false,
-      "special": true
     }
   ],
   "normalizer": {

       "content": "<|end|>",
       "single_word": false,
       "lstrip": false,
+      "rstrip": false,
       "normalized": false,
       "special": true
     },
       "rstrip": true,
       "normalized": false,
       "special": true
     }
   ],
   "normalizer": {

tokenizer_config.json CHANGED Viewed

@@ -87,7 +87,7 @@
       "content": "<|end|>",
       "lstrip": false,
       "normalized": false,
-      "rstrip": true,
       "single_word": false,
       "special": true
     },
@@ -114,22 +114,14 @@
       "rstrip": true,
       "single_word": false,
       "special": true
-    },
-    "32011": {
-      "content": "<|eot_id|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
     }
   },
   "bos_token": "<s>",
-  "chat_template": "{{ '<s>' }}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}",
   "clean_up_tokenization_spaces": false,
-  "eos_token": "<|eot_id|>",
   "legacy": false,
-  "model_max_length": 4096,
   "pad_token": "<|endoftext|>",
   "padding_side": "left",
   "sp_model_kwargs": {},

       "content": "<|end|>",
       "lstrip": false,
       "normalized": false,
+      "rstrip": false,
       "single_word": false,
       "special": true
     },
       "rstrip": true,
       "single_word": false,
       "special": true
     }
   },
   "bos_token": "<s>",
+  "chat_template": "{{ '<s>' }}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|system|>\n' + system_message + '<|end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + content + '<|end|>\n<|assistant|>\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end|>' + '\n' }}{% endif %}{% endfor %}",
   "clean_up_tokenization_spaces": false,
+  "eos_token": "<|end|>",
   "legacy": false,
+  "model_max_length": 131072,
   "pad_token": "<|endoftext|>",
   "padding_side": "left",
   "sp_model_kwargs": {},