owlgebra-ai Claude Opus 4.6 (1M context) commited on
Commit
eb1ba78
·
1 Parent(s): 06e59e9

Fix thinking tokens in chat + format specifier error

Browse files

- Strip <think>...</think> blocks from conversation display
- Pass enable_thinking=False to Qwen3 pipeline
- Handle incomplete think blocks (no closing tag)
- Fix f-string format specifier for bool values in reward banner

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +9 -3
  2. src/shop_rlve/simulator/llm_backend.py +6 -1
app.py CHANGED
@@ -334,7 +334,7 @@ def _fmt_reward_banner(reward: float, info: dict) -> str:
334
  if details:
335
  html += "\n\n| Metric | Value |\n|--------|-------|\n"
336
  for k, v in details.items():
337
- html += f"| {k} | {v:.4f if isinstance(v, float) else v} |\n"
338
  return html
339
 
340
 
@@ -397,12 +397,18 @@ def _fmt_episode(session: SessionState) -> str:
397
 
398
 
399
  def _fmt_chat(session: SessionState) -> list[dict]:
 
400
  out = []
401
  for m in session.conversation:
402
  role = m.get("role", "user")
403
  content = m.get("content", "")
404
- out.append({"role": "user" if role == "user" else "assistant",
405
- "content": content})
 
 
 
 
 
406
  return out
407
 
408
 
 
334
  if details:
335
  html += "\n\n| Metric | Value |\n|--------|-------|\n"
336
  for k, v in details.items():
337
+ html += f"| {k} | {f'{v:.4f}' if isinstance(v, float) else v} |\n"
338
  return html
339
 
340
 
 
397
 
398
 
399
  def _fmt_chat(session: SessionState) -> list[dict]:
400
+ import re as _re
401
  out = []
402
  for m in session.conversation:
403
  role = m.get("role", "user")
404
  content = m.get("content", "")
405
+ # Strip any thinking tokens from LLM responses
406
+ content = _re.sub(r"<think>.*?</think>", "", content, flags=_re.DOTALL)
407
+ content = _re.sub(r"<think>.*", "", content, flags=_re.DOTALL)
408
+ content = content.strip()
409
+ if content:
410
+ out.append({"role": "user" if role == "user" else "assistant",
411
+ "content": content})
412
  return out
413
 
414
 
src/shop_rlve/simulator/llm_backend.py CHANGED
@@ -93,12 +93,17 @@ def _transformers_generate(
93
  temperature=max(temperature, 0.01),
94
  do_sample=True,
95
  return_full_text=False,
 
96
  )
97
  text = result[0]["generated_text"]
98
  if isinstance(text, list):
99
  # chat pipeline returns list of message dicts
100
  text = text[-1].get("content", "") if text else ""
101
- text = re.sub(r"<think>.*?</think>", "", str(text), flags=re.DOTALL).strip()
 
 
 
 
102
  return text if text else None
103
  except Exception as exc:
104
  logger.warning("Transformers generation failed: %s", exc)
 
93
  temperature=max(temperature, 0.01),
94
  do_sample=True,
95
  return_full_text=False,
96
+ enable_thinking=False, # Disable Qwen3 thinking tokens
97
  )
98
  text = result[0]["generated_text"]
99
  if isinstance(text, list):
100
  # chat pipeline returns list of message dicts
101
  text = text[-1].get("content", "") if text else ""
102
+ text = str(text)
103
+ # Strip complete and incomplete thinking blocks
104
+ text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
105
+ text = re.sub(r"<think>.*", "", text, flags=re.DOTALL)
106
+ text = text.strip()
107
  return text if text else None
108
  except Exception as exc:
109
  logger.warning("Transformers generation failed: %s", exc)