Update app.py
Browse files
app.py
CHANGED
|
@@ -283,7 +283,12 @@ def respond(
|
|
| 283 |
top_k: int,
|
| 284 |
repeat_penalty: float,
|
| 285 |
):
|
| 286 |
-
llama = Llama("models/madlad400-3b-mt-q8_0.gguf"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
#tokens = llama.tokenize(f"<2ja>{message}")#
|
| 288 |
tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
|
| 289 |
llama.encode(tokens)
|
|
|
|
| 283 |
top_k: int,
|
| 284 |
repeat_penalty: float,
|
| 285 |
):
|
| 286 |
+
llama = Llama("models/madlad400-3b-mt-q8_0.gguf",flash_attn=False,
|
| 287 |
+
n_gpu_layers=0,
|
| 288 |
+
n_batch=16,
|
| 289 |
+
n_ctx=512,
|
| 290 |
+
n_threads=2,
|
| 291 |
+
n_threads_batch=8,)
|
| 292 |
#tokens = llama.tokenize(f"<2ja>{message}")#
|
| 293 |
tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
|
| 294 |
llama.encode(tokens)
|