Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -116,23 +116,22 @@ def retrieve_context(query, max_results=6, max_chars=600):
|
|
| 116 |
return []
|
| 117 |
|
| 118 |
def format_conversation(history, system_prompt, tokenizer):
|
|
|
|
|
|
|
|
|
|
| 119 |
if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
|
| 120 |
-
|
| 121 |
-
messages = [{"role": "system", "content": system_prompt.strip()}] + history
|
| 122 |
-
else:
|
| 123 |
-
messages = [{"role": "system", "content": system_prompt.strip()}]
|
| 124 |
return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=True)
|
| 125 |
else:
|
| 126 |
# Fallback for base LMs without chat template
|
| 127 |
prompt = system_prompt.strip() + "\n"
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
prompt += "Assistant: "
|
| 136 |
return prompt
|
| 137 |
|
| 138 |
def chat_response(user_msg, chat_history, system_prompt,
|
|
@@ -177,15 +176,7 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
| 177 |
enriched = system_prompt
|
| 178 |
|
| 179 |
pipe = load_pipeline(model_name)
|
| 180 |
-
|
| 181 |
-
# TODO:
|
| 182 |
-
debug += "\nLOAD MODEL:\n" + model_name
|
| 183 |
prompt = format_conversation(history, enriched, pipe["tokenizer"])
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
# TODO:
|
| 187 |
-
debug += "\nPROMPT:\n" + prompt
|
| 188 |
-
|
| 189 |
prompt_debug = f"\n\n--- Prompt Preview ---\n```\n{prompt}\n```"
|
| 190 |
streamer = TextIterStreamer(pipe["tokenizer"],
|
| 191 |
skip_prompt=True,
|
|
@@ -218,9 +209,6 @@ def chat_response(user_msg, chat_history, system_prompt,
|
|
| 218 |
break
|
| 219 |
text = chunk
|
| 220 |
|
| 221 |
-
# TODO:
|
| 222 |
-
debug += "\nRESPONSE:\n" + text
|
| 223 |
-
|
| 224 |
# Detect start of thinking
|
| 225 |
if not in_thought and '<think>' in text:
|
| 226 |
in_thought = True
|
|
|
|
| 116 |
return []
|
| 117 |
|
| 118 |
def format_conversation(history, system_prompt, tokenizer):
|
| 119 |
+
if history is None:
|
| 120 |
+
history = []
|
| 121 |
+
|
| 122 |
if hasattr(tokenizer, "chat_template") and tokenizer.chat_template:
|
| 123 |
+
messages = [{"role": "system", "content": system_prompt.strip()}] + history
|
|
|
|
|
|
|
|
|
|
| 124 |
return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, enable_thinking=True)
|
| 125 |
else:
|
| 126 |
# Fallback for base LMs without chat template
|
| 127 |
prompt = system_prompt.strip() + "\n"
|
| 128 |
+
for msg in history:
|
| 129 |
+
if msg['role'] == 'user':
|
| 130 |
+
prompt += "User: " + msg['content'].strip() + "\n"
|
| 131 |
+
elif msg['role'] == 'assistant':
|
| 132 |
+
prompt += "Assistant: " + msg['content'].strip() + "\n"
|
| 133 |
+
if not prompt.strip().endswith("Assistant:"):
|
| 134 |
+
prompt += "Assistant: "
|
|
|
|
| 135 |
return prompt
|
| 136 |
|
| 137 |
def chat_response(user_msg, chat_history, system_prompt,
|
|
|
|
| 176 |
enriched = system_prompt
|
| 177 |
|
| 178 |
pipe = load_pipeline(model_name)
|
|
|
|
|
|
|
|
|
|
| 179 |
prompt = format_conversation(history, enriched, pipe["tokenizer"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
prompt_debug = f"\n\n--- Prompt Preview ---\n```\n{prompt}\n```"
|
| 181 |
streamer = TextIterStreamer(pipe["tokenizer"],
|
| 182 |
skip_prompt=True,
|
|
|
|
| 209 |
break
|
| 210 |
text = chunk
|
| 211 |
|
|
|
|
|
|
|
|
|
|
| 212 |
# Detect start of thinking
|
| 213 |
if not in_thought and '<think>' in text:
|
| 214 |
in_thought = True
|