Spaces:
Runtime error
Runtime error
trying to speed up the response generation
Browse files
model.py
CHANGED
|
@@ -3,12 +3,13 @@ import os
|
|
| 3 |
|
| 4 |
model_path = "llama-3.2-1B-it-Ecommerce-ChatBot-merged-F16.gguf"
|
| 5 |
n_threads = os.cpu_count()
|
|
|
|
| 6 |
|
| 7 |
llm = Llama(
|
| 8 |
model_path=model_path,
|
| 9 |
n_ctx=512,
|
| 10 |
-
n_batch=
|
| 11 |
-
n_threads=
|
| 12 |
n_gpu_layers=-1,
|
| 13 |
chat_format="llama-3"
|
| 14 |
)
|
|
|
|
| 3 |
|
| 4 |
model_path = "llama-3.2-1B-it-Ecommerce-ChatBot-merged-F16.gguf"
|
| 5 |
n_threads = os.cpu_count()
|
| 6 |
+
print(f"number of cpu cores: {n_threads}")
|
| 7 |
|
| 8 |
llm = Llama(
|
| 9 |
model_path=model_path,
|
| 10 |
n_ctx=512,
|
| 11 |
+
n_batch=512,
|
| 12 |
+
n_threads=64,
|
| 13 |
n_gpu_layers=-1,
|
| 14 |
chat_format="llama-3"
|
| 15 |
)
|