It looks like there is incorrect limit on the model context length. The fp16 like the original one have 131072 length. Updating this value resolved errors while processing longer prompts.
Browse files- config.json +1 -1
config.json
CHANGED
|
@@ -20,7 +20,7 @@
|
|
| 20 |
"intermediate_size": 8192,
|
| 21 |
"interpolate_factor": 1,
|
| 22 |
"lm_head_bias": false,
|
| 23 |
-
"max_position_embeddings":
|
| 24 |
"mlp_bias": false,
|
| 25 |
"model_type": "phi3",
|
| 26 |
"num_attention_heads": 24,
|
|
|
|
| 20 |
"intermediate_size": 8192,
|
| 21 |
"interpolate_factor": 1,
|
| 22 |
"lm_head_bias": false,
|
| 23 |
+
"max_position_embeddings": 131072,
|
| 24 |
"mlp_bias": false,
|
| 25 |
"model_type": "phi3",
|
| 26 |
"num_attention_heads": 24,
|