Spaces:
Running
Running
owlgebra-ai Claude Opus 4.6 (1M context) commited on
Commit ·
eb1ba78
1
Parent(s): 06e59e9
Fix thinking tokens in chat + format specifier error
Browse files- Strip <think>...</think> blocks from conversation display
- Pass enable_thinking=False to Qwen3 pipeline
- Handle incomplete think blocks (no closing tag)
- Fix f-string format specifier for bool values in reward banner
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
- app.py +9 -3
- src/shop_rlve/simulator/llm_backend.py +6 -1
app.py
CHANGED
|
@@ -334,7 +334,7 @@ def _fmt_reward_banner(reward: float, info: dict) -> str:
|
|
| 334 |
if details:
|
| 335 |
html += "\n\n| Metric | Value |\n|--------|-------|\n"
|
| 336 |
for k, v in details.items():
|
| 337 |
-
html += f"| {k} | {v:.4f if isinstance(v, float) else v} |\n"
|
| 338 |
return html
|
| 339 |
|
| 340 |
|
|
@@ -397,12 +397,18 @@ def _fmt_episode(session: SessionState) -> str:
|
|
| 397 |
|
| 398 |
|
| 399 |
def _fmt_chat(session: SessionState) -> list[dict]:
|
|
|
|
| 400 |
out = []
|
| 401 |
for m in session.conversation:
|
| 402 |
role = m.get("role", "user")
|
| 403 |
content = m.get("content", "")
|
| 404 |
-
|
| 405 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
return out
|
| 407 |
|
| 408 |
|
|
|
|
| 334 |
if details:
|
| 335 |
html += "\n\n| Metric | Value |\n|--------|-------|\n"
|
| 336 |
for k, v in details.items():
|
| 337 |
+
html += f"| {k} | {f'{v:.4f}' if isinstance(v, float) else v} |\n"
|
| 338 |
return html
|
| 339 |
|
| 340 |
|
|
|
|
| 397 |
|
| 398 |
|
| 399 |
def _fmt_chat(session: SessionState) -> list[dict]:
|
| 400 |
+
import re as _re
|
| 401 |
out = []
|
| 402 |
for m in session.conversation:
|
| 403 |
role = m.get("role", "user")
|
| 404 |
content = m.get("content", "")
|
| 405 |
+
# Strip any thinking tokens from LLM responses
|
| 406 |
+
content = _re.sub(r"<think>.*?</think>", "", content, flags=_re.DOTALL)
|
| 407 |
+
content = _re.sub(r"<think>.*", "", content, flags=_re.DOTALL)
|
| 408 |
+
content = content.strip()
|
| 409 |
+
if content:
|
| 410 |
+
out.append({"role": "user" if role == "user" else "assistant",
|
| 411 |
+
"content": content})
|
| 412 |
return out
|
| 413 |
|
| 414 |
|
src/shop_rlve/simulator/llm_backend.py
CHANGED
|
@@ -93,12 +93,17 @@ def _transformers_generate(
|
|
| 93 |
temperature=max(temperature, 0.01),
|
| 94 |
do_sample=True,
|
| 95 |
return_full_text=False,
|
|
|
|
| 96 |
)
|
| 97 |
text = result[0]["generated_text"]
|
| 98 |
if isinstance(text, list):
|
| 99 |
# chat pipeline returns list of message dicts
|
| 100 |
text = text[-1].get("content", "") if text else ""
|
| 101 |
-
text =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
return text if text else None
|
| 103 |
except Exception as exc:
|
| 104 |
logger.warning("Transformers generation failed: %s", exc)
|
|
|
|
| 93 |
temperature=max(temperature, 0.01),
|
| 94 |
do_sample=True,
|
| 95 |
return_full_text=False,
|
| 96 |
+
enable_thinking=False, # Disable Qwen3 thinking tokens
|
| 97 |
)
|
| 98 |
text = result[0]["generated_text"]
|
| 99 |
if isinstance(text, list):
|
| 100 |
# chat pipeline returns list of message dicts
|
| 101 |
text = text[-1].get("content", "") if text else ""
|
| 102 |
+
text = str(text)
|
| 103 |
+
# Strip complete and incomplete thinking blocks
|
| 104 |
+
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
|
| 105 |
+
text = re.sub(r"<think>.*", "", text, flags=re.DOTALL)
|
| 106 |
+
text = text.strip()
|
| 107 |
return text if text else None
|
| 108 |
except Exception as exc:
|
| 109 |
logger.warning("Transformers generation failed: %s", exc)
|