anhphamduy commited on
Commit
50ee3c7
·
verified ·
1 Parent(s): 6c946c8

Upload folder using huggingface_hub

Browse files
added_tokens.json CHANGED
@@ -2,7 +2,6 @@
2
  "<|assistant|>": 32001,
3
  "<|endoftext|>": 32000,
4
  "<|end|>": 32007,
5
- "<|eot_id|>": 32011,
6
  "<|placeholder1|>": 32002,
7
  "<|placeholder2|>": 32003,
8
  "<|placeholder3|>": 32004,
 
2
  "<|assistant|>": 32001,
3
  "<|endoftext|>": 32000,
4
  "<|end|>": 32007,
 
5
  "<|placeholder1|>": 32002,
6
  "<|placeholder2|>": 32003,
7
  "<|placeholder3|>": 32004,
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "microsoft/Phi-3-mini-4k-instruct",
3
  "architectures": [
4
  "Phi3ForCausalLM"
5
  ],
@@ -8,7 +8,7 @@
8
  "auto_map": {
9
  "AutoConfig": "configuration_phi3.Phi3Config",
10
  "AutoModel": "modeling_phi3.Phi3ForCausalLM",
11
- "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
12
  },
13
  "bos_token_id": 1,
14
  "embd_pdrop": 0.0,
@@ -17,7 +17,7 @@
17
  "hidden_size": 3072,
18
  "initializer_range": 0.02,
19
  "intermediate_size": 8192,
20
- "max_position_embeddings": 4096,
21
  "model_type": "phi3",
22
  "num_attention_heads": 32,
23
  "num_hidden_layers": 32,
@@ -26,9 +26,111 @@
26
  "pad_token_id": 32000,
27
  "resid_pdrop": 0.0,
28
  "rms_norm_eps": 1e-05,
29
- "rope_scaling": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  "rope_theta": 10000.0,
31
- "sliding_window": 2047,
32
  "tie_word_embeddings": false,
33
  "torch_dtype": "bfloat16",
34
  "transformers_version": "4.45.0.dev0",
 
1
  {
2
+ "_name_or_path": "microsoft/Phi-3.5-mini-instruct",
3
  "architectures": [
4
  "Phi3ForCausalLM"
5
  ],
 
8
  "auto_map": {
9
  "AutoConfig": "configuration_phi3.Phi3Config",
10
  "AutoModel": "modeling_phi3.Phi3ForCausalLM",
11
+ "AutoModelForCausalLM": "microsoft/Phi-3.5-mini-instruct--modeling_phi3.Phi3ForCausalLM"
12
  },
13
  "bos_token_id": 1,
14
  "embd_pdrop": 0.0,
 
17
  "hidden_size": 3072,
18
  "initializer_range": 0.02,
19
  "intermediate_size": 8192,
20
+ "max_position_embeddings": 131072,
21
  "model_type": "phi3",
22
  "num_attention_heads": 32,
23
  "num_hidden_layers": 32,
 
26
  "pad_token_id": 32000,
27
  "resid_pdrop": 0.0,
28
  "rms_norm_eps": 1e-05,
29
+ "rope_scaling": {
30
+ "long_factor": [
31
+ 1.0800000429153442,
32
+ 1.1100000143051147,
33
+ 1.1399999856948853,
34
+ 1.340000033378601,
35
+ 1.5899999141693115,
36
+ 1.600000023841858,
37
+ 1.6200000047683716,
38
+ 2.620000123977661,
39
+ 3.2300000190734863,
40
+ 3.2300000190734863,
41
+ 4.789999961853027,
42
+ 7.400000095367432,
43
+ 7.700000286102295,
44
+ 9.09000015258789,
45
+ 12.199999809265137,
46
+ 17.670000076293945,
47
+ 24.46000099182129,
48
+ 28.57000160217285,
49
+ 30.420001983642578,
50
+ 30.840002059936523,
51
+ 32.590003967285156,
52
+ 32.93000411987305,
53
+ 42.320003509521484,
54
+ 44.96000289916992,
55
+ 50.340003967285156,
56
+ 50.45000457763672,
57
+ 57.55000305175781,
58
+ 57.93000411987305,
59
+ 58.21000289916992,
60
+ 60.1400032043457,
61
+ 62.61000442504883,
62
+ 62.62000274658203,
63
+ 62.71000289916992,
64
+ 63.1400032043457,
65
+ 63.1400032043457,
66
+ 63.77000427246094,
67
+ 63.93000411987305,
68
+ 63.96000289916992,
69
+ 63.970001220703125,
70
+ 64.02999877929688,
71
+ 64.06999969482422,
72
+ 64.08000183105469,
73
+ 64.12000274658203,
74
+ 64.41000366210938,
75
+ 64.4800033569336,
76
+ 64.51000213623047,
77
+ 64.52999877929688,
78
+ 64.83999633789062
79
+ ],
80
+ "short_factor": [
81
+ 1.0,
82
+ 1.0199999809265137,
83
+ 1.0299999713897705,
84
+ 1.0299999713897705,
85
+ 1.0499999523162842,
86
+ 1.0499999523162842,
87
+ 1.0499999523162842,
88
+ 1.0499999523162842,
89
+ 1.0499999523162842,
90
+ 1.0699999332427979,
91
+ 1.0999999046325684,
92
+ 1.1099998950958252,
93
+ 1.1599998474121094,
94
+ 1.1599998474121094,
95
+ 1.1699998378753662,
96
+ 1.2899998426437378,
97
+ 1.339999794960022,
98
+ 1.679999828338623,
99
+ 1.7899998426437378,
100
+ 1.8199998140335083,
101
+ 1.8499997854232788,
102
+ 1.8799997568130493,
103
+ 1.9099997282028198,
104
+ 1.9399996995925903,
105
+ 1.9899996519088745,
106
+ 2.0199997425079346,
107
+ 2.0199997425079346,
108
+ 2.0199997425079346,
109
+ 2.0199997425079346,
110
+ 2.0199997425079346,
111
+ 2.0199997425079346,
112
+ 2.0299997329711914,
113
+ 2.0299997329711914,
114
+ 2.0299997329711914,
115
+ 2.0299997329711914,
116
+ 2.0299997329711914,
117
+ 2.0299997329711914,
118
+ 2.0299997329711914,
119
+ 2.0299997329711914,
120
+ 2.0299997329711914,
121
+ 2.0799996852874756,
122
+ 2.0899996757507324,
123
+ 2.189999580383301,
124
+ 2.2199995517730713,
125
+ 2.5899994373321533,
126
+ 2.729999542236328,
127
+ 2.749999523162842,
128
+ 2.8399994373321533
129
+ ],
130
+ "type": "longrope"
131
+ },
132
  "rope_theta": 10000.0,
133
+ "sliding_window": 262144,
134
  "tie_word_embeddings": false,
135
  "torch_dtype": "bfloat16",
136
  "transformers_version": "4.45.0.dev0",
generation_config.json CHANGED
@@ -2,9 +2,9 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": [
5
- 32000,
6
  32001,
7
- 32007
8
  ],
9
  "pad_token_id": 32000,
10
  "transformers_version": "4.45.0.dev0"
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": [
5
+ 32007,
6
  32001,
7
+ 32000
8
  ],
9
  "pad_token_id": 32000,
10
  "transformers_version": "4.45.0.dev0"
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4983793de0ac96315a6f9f7a2bd18a1bebe8938e2af6c164943bbbe94a6e959
3
  size 1958700256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:567b7723ea6a425b4e8a511facb5ad710a0b81760fb49dc4d8c7731de2e97e66
3
  size 1958700256
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e5aba5c0bf7c7cd1a589691478ad19b36b24b150093dd43c920b3825a725063
3
  size 1937885224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36f4e8677d7423d9557ab59d0d3eec5abd0e0349189836398cb753104d0aaf52
3
  size 1937885224
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29ca0ea2287b7451cac4dd0c3fbd9ae44fd6e428ecad0d6fc3433eaaa9c5aeb3
3
  size 1981925424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f485a2b00a23f8f34f33ce2de5a92148a5b58af3040aa2d7ec33c5429fa4d598
3
  size 1981925424
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5204ae411c6d1f70937f0bcbb593616ba4404bbe551544009b8085f2d92e635
3
  size 1763670920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52d3e202a2f8ced0888a93ea2d1643cc00b7f45c8507b6d67c36811edb2df55b
3
  size 1763670920
modeling_phi3.py CHANGED
@@ -149,7 +149,7 @@ class Phi3LongRoPEScaledRotaryEmbedding(Phi3RotaryEmbedding):
149
 
150
  @torch.no_grad()
151
  def forward(self, x, position_ids, seq_len=None):
152
- seq_len = torch.max(position_ids) + 1
153
  if seq_len > self.original_max_position_embeddings:
154
  ext_factors = torch.tensor(self.long_factor, dtype=torch.float32, device=x.device)
155
  else:
@@ -451,7 +451,7 @@ class Phi3FlashAttention2(Phi3Attention):
451
  kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
452
 
453
  # Because the input can be padded, the absolute sequence length depends on the max position id.
454
- rotary_seq_len = max(kv_seq_len, position_ids[:, -1].max().item()) + 1
455
  cos, sin = self.rotary_emb(value_states, position_ids, seq_len=rotary_seq_len)
456
 
457
  query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
@@ -1285,6 +1285,13 @@ class Phi3ForCausalLM(Phi3PreTrainedModel):
1285
  def prepare_inputs_for_generation(
1286
  self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
1287
  ):
 
 
 
 
 
 
 
1288
  if past_key_values is not None:
1289
  if isinstance(past_key_values, Cache):
1290
  cache_length = past_key_values.get_seq_length()
 
149
 
150
  @torch.no_grad()
151
  def forward(self, x, position_ids, seq_len=None):
152
+ seq_len = seq_len or torch.max(position_ids) + 1
153
  if seq_len > self.original_max_position_embeddings:
154
  ext_factors = torch.tensor(self.long_factor, dtype=torch.float32, device=x.device)
155
  else:
 
451
  kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
452
 
453
  # Because the input can be padded, the absolute sequence length depends on the max position id.
454
+ rotary_seq_len = max(kv_seq_len, position_ids[:, -1].max().item() + 1)
455
  cos, sin = self.rotary_emb(value_states, position_ids, seq_len=rotary_seq_len)
456
 
457
  query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
 
1285
  def prepare_inputs_for_generation(
1286
  self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
1287
  ):
1288
+ # When the first time input length reached long and short factor switching point, enforce re-compute cache
1289
+ # It will cause downside of slower at this single token position, however, better than current failure.
1290
+ if past_key_values and self.config.rope_scaling and input_ids.shape[1] >= self.config.original_max_position_embeddings + 1:
1291
+ past_length = past_key_values.seen_tokens if isinstance(past_key_values, Cache) else past_key_values[0][0].shape[2]
1292
+ if past_length <= self.config.original_max_position_embeddings:
1293
+ past_key_values = None
1294
+
1295
  if past_key_values is not None:
1296
  if isinstance(past_key_values, Cache):
1297
  cache_length = past_key_values.get_seq_length()
special_tokens_map.json CHANGED
@@ -7,7 +7,7 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|eot_id|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|end|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
tokenizer.json CHANGED
@@ -98,7 +98,7 @@
98
  "content": "<|end|>",
99
  "single_word": false,
100
  "lstrip": false,
101
- "rstrip": true,
102
  "normalized": false,
103
  "special": true
104
  },
@@ -128,15 +128,6 @@
128
  "rstrip": true,
129
  "normalized": false,
130
  "special": true
131
- },
132
- {
133
- "id": 32011,
134
- "content": "<|eot_id|>",
135
- "single_word": false,
136
- "lstrip": false,
137
- "rstrip": false,
138
- "normalized": false,
139
- "special": true
140
  }
141
  ],
142
  "normalizer": {
 
98
  "content": "<|end|>",
99
  "single_word": false,
100
  "lstrip": false,
101
+ "rstrip": false,
102
  "normalized": false,
103
  "special": true
104
  },
 
128
  "rstrip": true,
129
  "normalized": false,
130
  "special": true
 
 
 
 
 
 
 
 
 
131
  }
132
  ],
133
  "normalizer": {
tokenizer_config.json CHANGED
@@ -87,7 +87,7 @@
87
  "content": "<|end|>",
88
  "lstrip": false,
89
  "normalized": false,
90
- "rstrip": true,
91
  "single_word": false,
92
  "special": true
93
  },
@@ -114,22 +114,14 @@
114
  "rstrip": true,
115
  "single_word": false,
116
  "special": true
117
- },
118
- "32011": {
119
- "content": "<|eot_id|>",
120
- "lstrip": false,
121
- "normalized": false,
122
- "rstrip": false,
123
- "single_word": false,
124
- "special": true
125
  }
126
  },
127
  "bos_token": "<s>",
128
- "chat_template": "{{ '<s>' }}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' }}{% endif %}{% endfor %}",
129
  "clean_up_tokenization_spaces": false,
130
- "eos_token": "<|eot_id|>",
131
  "legacy": false,
132
- "model_max_length": 4096,
133
  "pad_token": "<|endoftext|>",
134
  "padding_side": "left",
135
  "sp_model_kwargs": {},
 
87
  "content": "<|end|>",
88
  "lstrip": false,
89
  "normalized": false,
90
+ "rstrip": false,
91
  "single_word": false,
92
  "special": true
93
  },
 
114
  "rstrip": true,
115
  "single_word": false,
116
  "special": true
 
 
 
 
 
 
 
 
117
  }
118
  },
119
  "bos_token": "<s>",
120
+ "chat_template": "{{ '<s>' }}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|system|>\n' + system_message + '<|end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + content + '<|end|>\n<|assistant|>\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end|>' + '\n' }}{% endif %}{% endfor %}",
121
  "clean_up_tokenization_spaces": false,
122
+ "eos_token": "<|end|>",
123
  "legacy": false,
124
+ "model_max_length": 131072,
125
  "pad_token": "<|endoftext|>",
126
  "padding_side": "left",
127
  "sp_model_kwargs": {},