Spaces:
Running
Running
bugfix for think tag handling
Browse files
app.py
CHANGED
|
@@ -116,7 +116,6 @@ def validate_or_download_model():
|
|
| 116 |
cleanup_old_models()
|
| 117 |
download_model()
|
| 118 |
|
| 119 |
-
# First load attempt
|
| 120 |
result = try_load_model(model_path)
|
| 121 |
if isinstance(result, str):
|
| 122 |
st.warning(f"Initial load failed: {result}\nAttempting re-download...")
|
|
@@ -153,7 +152,6 @@ st.caption(f"Powered by `llama.cpp` | Model: {selected_model['filename']}")
|
|
| 153 |
user_input = st.chat_input("Ask something...")
|
| 154 |
|
| 155 |
if user_input:
|
| 156 |
-
# Prevent appending user message if assistant hasn't replied yet
|
| 157 |
if len(st.session_state.chat_history) % 2 == 1:
|
| 158 |
st.warning("Please wait for the assistant to respond before sending another message.")
|
| 159 |
else:
|
|
@@ -162,15 +160,12 @@ if user_input:
|
|
| 162 |
with st.chat_message("user"):
|
| 163 |
st.markdown(user_input)
|
| 164 |
|
| 165 |
-
# Trim conversation history to max 8 turns (user+assistant)
|
| 166 |
MAX_TURNS = 8
|
| 167 |
trimmed_history = st.session_state.chat_history[-MAX_TURNS * 2:]
|
| 168 |
messages = [{"role": "system", "content": system_prompt}] + trimmed_history
|
| 169 |
|
| 170 |
with st.chat_message("assistant"):
|
| 171 |
full_response = ""
|
| 172 |
-
response_area = st.empty()
|
| 173 |
-
|
| 174 |
stream = llm.create_chat_completion(
|
| 175 |
messages=messages,
|
| 176 |
max_tokens=max_tokens,
|
|
@@ -185,8 +180,9 @@ if user_input:
|
|
| 185 |
if "choices" in chunk:
|
| 186 |
delta = chunk["choices"][0]["delta"].get("content", "")
|
| 187 |
full_response += delta
|
| 188 |
-
|
| 189 |
-
|
|
|
|
| 190 |
|
| 191 |
st.session_state.chat_history.append({"role": "assistant", "content": full_response})
|
| 192 |
|
|
|
|
| 116 |
cleanup_old_models()
|
| 117 |
download_model()
|
| 118 |
|
|
|
|
| 119 |
result = try_load_model(model_path)
|
| 120 |
if isinstance(result, str):
|
| 121 |
st.warning(f"Initial load failed: {result}\nAttempting re-download...")
|
|
|
|
| 152 |
user_input = st.chat_input("Ask something...")
|
| 153 |
|
| 154 |
if user_input:
|
|
|
|
| 155 |
if len(st.session_state.chat_history) % 2 == 1:
|
| 156 |
st.warning("Please wait for the assistant to respond before sending another message.")
|
| 157 |
else:
|
|
|
|
| 160 |
with st.chat_message("user"):
|
| 161 |
st.markdown(user_input)
|
| 162 |
|
|
|
|
| 163 |
MAX_TURNS = 8
|
| 164 |
trimmed_history = st.session_state.chat_history[-MAX_TURNS * 2:]
|
| 165 |
messages = [{"role": "system", "content": system_prompt}] + trimmed_history
|
| 166 |
|
| 167 |
with st.chat_message("assistant"):
|
| 168 |
full_response = ""
|
|
|
|
|
|
|
| 169 |
stream = llm.create_chat_completion(
|
| 170 |
messages=messages,
|
| 171 |
max_tokens=max_tokens,
|
|
|
|
| 180 |
if "choices" in chunk:
|
| 181 |
delta = chunk["choices"][0]["delta"].get("content", "")
|
| 182 |
full_response += delta
|
| 183 |
+
|
| 184 |
+
visible_response = re.sub(r"<think>.*?</think>", "", full_response, flags=re.DOTALL)
|
| 185 |
+
st.markdown(visible_response)
|
| 186 |
|
| 187 |
st.session_state.chat_history.append({"role": "assistant", "content": full_response})
|
| 188 |
|