Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -45,29 +45,31 @@ Your job:
|
|
| 45 |
# Chat function (generator for streaming)
|
| 46 |
# ----------------------------
|
| 47 |
def generate_response(message, history):
|
| 48 |
-
|
| 49 |
yield "π€ Thinking..."
|
| 50 |
time.sleep(0.5)
|
| 51 |
|
| 52 |
prompt = f"<|im_start|>system\n{SYSTEM_PROMPT}<|im_end|>\n"
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
| 59 |
|
| 60 |
-
# Streaming tokens
|
| 61 |
output = ""
|
| 62 |
-
for token in llm(
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
| 68 |
output += token["choices"][0]["text"]
|
| 69 |
yield output
|
| 70 |
-
|
| 71 |
# ----------------------------
|
| 72 |
# Gradio UI
|
| 73 |
# ----------------------------
|
|
|
|
| 45 |
# Chat function (generator for streaming)
|
| 46 |
# ----------------------------
|
| 47 |
def generate_response(message, history):
|
|
|
|
| 48 |
yield "π€ Thinking..."
|
| 49 |
time.sleep(0.5)
|
| 50 |
|
| 51 |
prompt = f"<|im_start|>system\n{SYSTEM_PROMPT}<|im_end|>\n"
|
| 52 |
|
| 53 |
+
# Safe history handling
|
| 54 |
+
for h in history:
|
| 55 |
+
if len(h) >= 2:
|
| 56 |
+
user, assistant = h[0], h[1]
|
| 57 |
+
prompt += f"<|im_start|>user\n{user}<|im_end|>\n"
|
| 58 |
+
prompt += f"<|im_start|>assistant\n{assistant}<|im_end|>\n"
|
| 59 |
|
| 60 |
prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
| 61 |
|
|
|
|
| 62 |
output = ""
|
| 63 |
+
for token in llm(
|
| 64 |
+
prompt,
|
| 65 |
+
max_tokens=2048, # increase for longer responses
|
| 66 |
+
temperature=0.2,
|
| 67 |
+
top_p=0.9,
|
| 68 |
+
repeat_penalty=1.1,
|
| 69 |
+
stream=True
|
| 70 |
+
):
|
| 71 |
output += token["choices"][0]["text"]
|
| 72 |
yield output
|
|
|
|
| 73 |
# ----------------------------
|
| 74 |
# Gradio UI
|
| 75 |
# ----------------------------
|