sliding_window updated
Browse filesTraceback (most recent call last):
File "/opt/conda/bin/text-generation-server", line 8, in <module>
sys.exit(app())
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/cli.py", line 83, in serve
server.serve(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/server.py", line 207, in serve
asyncio.run(
File "/opt/conda/lib/python3.9/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 647, in run_until_complete
return future.result()
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/server.py", line 159, in serve_inner
model = get_model(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/__init__.py", line 252, in get_model
return FlashMistral(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/flash_mistral.py", line 312, in __init__
SLIDING_WINDOW_BLOCKS = math.ceil(config.sliding_window / BLOCK_SIZE)
By passing the Sliding window value we can update this

- config.json +1 -1
|
@@ -16,7 +16,7 @@
|
|
| 16 |
"num_key_value_heads": 8,
|
| 17 |
"rms_norm_eps": 1e-05,
|
| 18 |
"rope_theta": 1000000.0,
|
| 19 |
-
"sliding_window":
|
| 20 |
"tie_word_embeddings": false,
|
| 21 |
"torch_dtype": "bfloat16",
|
| 22 |
"transformers_version": "4.36.0",
|
|
|
|
| 16 |
"num_key_value_heads": 8,
|
| 17 |
"rms_norm_eps": 1e-05,
|
| 18 |
"rope_theta": 1000000.0,
|
| 19 |
+
"sliding_window": 4096,
|
| 20 |
"tie_word_embeddings": false,
|
| 21 |
"torch_dtype": "bfloat16",
|
| 22 |
"transformers_version": "4.36.0",
|