Update app.py
Browse files
app.py
CHANGED
|
@@ -12,8 +12,8 @@ llama = llama_cpp.Llama.from_pretrained(
|
|
| 12 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
|
| 13 |
verbose=False,
|
| 14 |
n_ctx=4096,
|
| 15 |
-
n_threads=4,
|
| 16 |
n_gpu_layers=0,
|
|
|
|
| 17 |
)
|
| 18 |
# Logger setup
|
| 19 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -74,7 +74,11 @@ async def complete(
|
|
| 74 |
],
|
| 75 |
temperature=temperature,
|
| 76 |
seed=seed,
|
|
|
|
|
|
|
| 77 |
)
|
|
|
|
|
|
|
| 78 |
et = time()
|
| 79 |
output["time"] = et - st
|
| 80 |
return output
|
|
|
|
| 12 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
|
| 13 |
verbose=False,
|
| 14 |
n_ctx=4096,
|
|
|
|
| 15 |
n_gpu_layers=0,
|
| 16 |
+
chat_format="llama-2"
|
| 17 |
)
|
| 18 |
# Logger setup
|
| 19 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 74 |
],
|
| 75 |
temperature=temperature,
|
| 76 |
seed=seed,
|
| 77 |
+
echo=True,
|
| 78 |
+
stream=True
|
| 79 |
)
|
| 80 |
+
for item in output:
|
| 81 |
+
print(item['choices'][0]['text'], end='')
|
| 82 |
et = time()
|
| 83 |
output["time"] = et - st
|
| 84 |
return output
|