It looks like there is incorrect limit on the model context length. The fp16 like the original one have 131072 length. Updating this value resolved errors while processing longer prompts.

Files changed (1) hide show

config.json CHANGED Viewed

@@ -20,7 +20,7 @@
   "intermediate_size": 8192,
   "interpolate_factor": 1,
   "lm_head_bias": false,
-  "max_position_embeddings": 4096,
   "mlp_bias": false,
   "model_type": "phi3",
   "num_attention_heads": 24,

   "intermediate_size": 8192,
   "interpolate_factor": 1,
   "lm_head_bias": false,
+  "max_position_embeddings": 131072,
   "mlp_bias": false,
   "model_type": "phi3",
   "num_attention_heads": 24,