Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,7 @@ model = Llama.from_pretrained(
|
|
| 12 |
)
|
| 13 |
|
| 14 |
def chat(message, history):
|
| 15 |
-
messages = []
|
| 16 |
for user, assistant in history:
|
| 17 |
messages.append({"role": "user", "content": user})
|
| 18 |
messages.append({"role": "assistant", "content": assistant})
|
|
@@ -22,8 +22,7 @@ def chat(message, history):
|
|
| 22 |
for chunk in model.create_chat_completion(
|
| 23 |
messages=messages,
|
| 24 |
max_tokens=2048,
|
| 25 |
-
stream=True
|
| 26 |
-
chat_template_kwargs={"enable_thinking": False}
|
| 27 |
):
|
| 28 |
delta = chunk["choices"][0]["delta"].get("content", "")
|
| 29 |
output += delta
|
|
|
|
| 12 |
)
|
| 13 |
|
| 14 |
def chat(message, history):
|
| 15 |
+
messages = [{"role": "system", "content": "/nothink"}]
|
| 16 |
for user, assistant in history:
|
| 17 |
messages.append({"role": "user", "content": user})
|
| 18 |
messages.append({"role": "assistant", "content": assistant})
|
|
|
|
| 22 |
for chunk in model.create_chat_completion(
|
| 23 |
messages=messages,
|
| 24 |
max_tokens=2048,
|
| 25 |
+
stream=True
|
|
|
|
| 26 |
):
|
| 27 |
delta = chunk["choices"][0]["delta"].get("content", "")
|
| 28 |
output += delta
|