Mirrowel commited on
Commit
bccb879
·
1 Parent(s): 1450294

refactor(antigravity): 🔨 migrate thinking sanitization to gemini message format

Browse files

This commit refactors the Claude thinking sanitization logic to operate on Gemini-format messages after transformation, rather than before. This enables the sanitization to work with the full message context including thinking blocks that were restored from cache during the transformation process.

Key changes:
- Move `_sanitize_thinking_for_claude` call to after `_transform_messages` instead of before
- Update all thinking detection and manipulation methods to work with Gemini format (role "model", "parts" array with "thought": true)
- Refactor `_analyze_turn_state` to detect tool results as user messages with "functionResponse" parts
- Update `_message_has_thinking` to check for "thought": true in parts array
- Add new `_message_has_tool_calls` helper for Gemini format detection
- Refactor `_strip_all_thinking_blocks` to filter parts with "thought": true
- Update `_strip_old_turn_thinking` and `_preserve_turn_start_thinking` for Gemini format
- Refactor `_looks_like_compacted_thinking_turn` to detect functionCall parts without thinking
- Update `_recover_thinking_from_cache` to inject thinking as Gemini-format part with "thought": true
- Refactor `_close_tool_loop_for_thinking` to use Gemini message structure
- Update all docstrings and comments to reflect "model" role instead of "assistant"

This change fixes issues where context compression or client-side stripping of reasoning_content would prevent proper thinking sanitization, as the sanitization now occurs after the transformation has restored thinking from cache.

src/rotator_library/providers/antigravity_provider.py CHANGED
@@ -1178,20 +1178,27 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1178
  Returns:
1179
  {
1180
  "in_tool_loop": bool - True if we're in an incomplete tool use loop
1181
- "turn_start_idx": int - Index of first assistant message in current turn
1182
  "turn_has_thinking": bool - Whether the TURN started with thinking
1183
- "last_assistant_idx": int - Index of last assistant message
1184
- "last_assistant_has_thinking": bool - Whether last assistant msg has thinking
1185
- "last_assistant_has_tool_calls": bool - Whether last assistant msg has tool calls
1186
- "pending_tool_results": bool - Whether there are tool results after last assistant
1187
  "thinking_block_indices": List[int] - Indices of messages with thinking/reasoning
1188
  }
 
 
 
 
 
 
 
1189
  """
1190
  state = {
1191
  "in_tool_loop": False,
1192
  "turn_start_idx": -1,
1193
  "turn_has_thinking": False,
1194
- "last_assistant_idx": -1,
1195
  "last_assistant_has_thinking": False,
1196
  "last_assistant_has_tool_calls": False,
1197
  "pending_tool_results": False,
@@ -1199,25 +1206,16 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1199
  }
1200
 
1201
  # First pass: Find the last "real" user message (not a tool result)
1202
- # A real user message is one that doesn't immediately follow an assistant with tool_calls
1203
  last_real_user_idx = -1
1204
  for i, msg in enumerate(messages):
1205
  role = msg.get("role")
1206
  if role == "user":
1207
- # Check if this is a real user message or just follows tool results
1208
- # Tool messages have role="tool", so if this is role="user" and
1209
- # it's not just a tool_result container, it's a real user message.
1210
- # However, we need to be careful: the client might format tool results
1211
- # as user messages with tool_result content. Check the content.
1212
- content = msg.get("content")
1213
-
1214
- # If content is a list with tool_result items, it's a tool response
1215
- is_tool_result_msg = False
1216
- if isinstance(content, list):
1217
- for item in content:
1218
- if isinstance(item, dict) and item.get("type") == "tool_result":
1219
- is_tool_result_msg = True
1220
- break
1221
 
1222
  if not is_tool_result_msg:
1223
  last_real_user_idx = i
@@ -1226,52 +1224,71 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1226
  for i, msg in enumerate(messages):
1227
  role = msg.get("role")
1228
 
1229
- if role == "assistant":
1230
- # Check for thinking/reasoning content
1231
  has_thinking = self._message_has_thinking(msg)
1232
 
 
 
 
 
 
 
1233
  # Track if this is the turn start
1234
  if i > last_real_user_idx and state["turn_start_idx"] == -1:
1235
  state["turn_start_idx"] = i
1236
  state["turn_has_thinking"] = has_thinking
1237
 
1238
  state["last_assistant_idx"] = i
1239
- state["last_assistant_has_tool_calls"] = bool(msg.get("tool_calls"))
1240
  state["last_assistant_has_thinking"] = has_thinking
1241
 
1242
  if has_thinking:
1243
  state["thinking_block_indices"].append(i)
1244
 
1245
- elif role == "tool":
1246
- # Tool result after an assistant message with tool calls = in tool loop
1247
- if state["last_assistant_has_tool_calls"]:
 
 
 
 
 
1248
  state["pending_tool_results"] = True
1249
 
1250
  # We're in a tool loop if:
1251
  # 1. There are pending tool results
1252
- # 2. The conversation ends with tool results (last message is "tool" role)
1253
  if state["pending_tool_results"] and messages:
1254
  last_msg = messages[-1]
1255
- if last_msg.get("role") == "tool":
1256
- state["in_tool_loop"] = True
 
 
 
 
 
1257
 
1258
  return state
1259
 
1260
  def _message_has_thinking(self, msg: Dict[str, Any]) -> bool:
1261
- """Check if an assistant message contains thinking/reasoning content."""
1262
- # Check reasoning_content field (OpenAI format)
1263
- if msg.get("reasoning_content"):
1264
- return True
1265
-
1266
- # Check for thinking in content array (some formats)
1267
- content = msg.get("content")
1268
- if isinstance(content, list):
1269
- for item in content:
1270
- if isinstance(item, dict) and item.get("type") == "thinking":
1271
- return True
1272
 
 
 
 
 
 
 
 
1273
  return False
1274
 
 
 
 
 
 
1275
  def _sanitize_thinking_for_claude(
1276
  self, messages: List[Dict[str, Any]], thinking_enabled: bool
1277
  ) -> Tuple[List[Dict[str, Any]], bool]:
@@ -1403,7 +1420,7 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1403
  state["last_assistant_has_tool_calls"]
1404
  and not state["turn_has_thinking"]
1405
  ):
1406
- # The turn has tool_calls but no thinking at turn start.
1407
  # This could be:
1408
  # 1. Compaction removed the thinking block
1409
  # 2. The original call was made without thinking
@@ -1412,7 +1429,7 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1412
  # For case 2, we let the model respond naturally.
1413
  #
1414
  # We can detect case 1 if there's evidence thinking was expected:
1415
- # - The turn_start message has tool_calls (typical thinking-enabled flow)
1416
  # - The content structure suggests a thinking block was stripped
1417
 
1418
  # Check if turn_start has the hallmarks of a compacted thinking response
@@ -1436,18 +1453,21 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1436
  messages, state["turn_start_idx"]
1437
  ), False
1438
  else:
1439
- # Can't recover - add synthetic user to start fresh turn
1440
  lib_logger.info(
1441
  "[Thinking Sanitization] Detected compacted turn missing thinking block. "
1442
  "Adding synthetic user message to start fresh thinking turn."
1443
  )
1444
  # Add synthetic user message to trigger new turn with thinking
1445
- synthetic_user = {"role": "user", "content": "[Continue]"}
 
 
 
1446
  messages.append(synthetic_user)
1447
  return self._strip_all_thinking_blocks(messages), False
1448
  else:
1449
  lib_logger.debug(
1450
- "[Thinking Sanitization] Last assistant has tool_calls but no thinking. "
1451
  "This is likely from context compression or non-thinking model. "
1452
  "New response will include thinking naturally."
1453
  )
@@ -1460,75 +1480,80 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1460
  def _strip_all_thinking_blocks(
1461
  self, messages: List[Dict[str, Any]]
1462
  ) -> List[Dict[str, Any]]:
1463
- """Remove all thinking/reasoning content from messages."""
1464
- for msg in messages:
1465
- if msg.get("role") == "assistant":
1466
- # Remove reasoning_content field
1467
- msg.pop("reasoning_content", None)
1468
 
1469
- # Remove thinking blocks from content array
1470
- content = msg.get("content")
1471
- if isinstance(content, list):
 
 
 
 
 
 
1472
  filtered = [
1473
- item
1474
- for item in content
1475
- if not (
1476
- isinstance(item, dict) and item.get("type") == "thinking"
1477
- )
1478
  ]
1479
- # If filtering leaves empty list, we need to preserve message structure
1480
- # to maintain user/assistant alternation. Use empty string as placeholder
1481
- # (will result in empty "text" part which is valid).
 
 
 
1482
  if not filtered:
1483
- # Only if there are no tool_calls either - otherwise message is valid
1484
- if not msg.get("tool_calls"):
1485
- msg["content"] = ""
1486
  else:
1487
- msg["content"] = (
1488
- None # tool_calls exist, content not needed
1489
- )
1490
  else:
1491
- msg["content"] = filtered
1492
  return messages
1493
 
1494
  def _strip_old_turn_thinking(
1495
- self, messages: List[Dict[str, Any]], last_assistant_idx: int
1496
  ) -> List[Dict[str, Any]]:
1497
  """
1498
- Strip thinking from old turns but preserve for the last assistant turn.
1499
 
1500
  Per Claude docs: "thinking blocks from previous turns are removed from context"
1501
  This mimics the API behavior and prevents issues.
 
 
1502
  """
1503
  for i, msg in enumerate(messages):
1504
- if msg.get("role") == "assistant" and i < last_assistant_idx:
1505
- # Old turn - strip thinking
1506
- msg.pop("reasoning_content", None)
1507
- content = msg.get("content")
1508
- if isinstance(content, list):
1509
  filtered = [
1510
- item
1511
- for item in content
1512
- if not (
1513
- isinstance(item, dict) and item.get("type") == "thinking"
1514
- )
1515
  ]
1516
- # Preserve message structure with empty string if needed
 
 
 
 
1517
  if not filtered:
1518
- msg["content"] = "" if not msg.get("tool_calls") else None
1519
  else:
1520
- msg["content"] = filtered
1521
  return messages
1522
 
1523
  def _preserve_current_turn_thinking(
1524
- self, messages: List[Dict[str, Any]], last_assistant_idx: int
1525
  ) -> List[Dict[str, Any]]:
1526
  """
1527
- Preserve thinking only for the current (last) assistant turn.
1528
  Strip from all previous turns.
1529
  """
1530
  # Same as strip_old_turn_thinking - we keep the last turn intact
1531
- return self._strip_old_turn_thinking(messages, last_assistant_idx)
1532
 
1533
  def _preserve_turn_start_thinking(
1534
  self, messages: List[Dict[str, Any]], turn_start_idx: int
@@ -1536,65 +1561,66 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1536
  """
1537
  Preserve thinking at the turn start message.
1538
 
1539
- In multi-message tool loops, the thinking block is at the FIRST assistant
1540
  message of the turn (turn_start_idx), not the last one. We need to preserve
1541
  thinking from the turn start, and strip it from all older turns.
 
 
1542
  """
1543
  for i, msg in enumerate(messages):
1544
- if msg.get("role") == "assistant" and i < turn_start_idx:
1545
- # Old turn - strip thinking
1546
- msg.pop("reasoning_content", None)
1547
- content = msg.get("content")
1548
- if isinstance(content, list):
1549
  filtered = [
1550
- item
1551
- for item in content
1552
- if not (
1553
- isinstance(item, dict) and item.get("type") == "thinking"
1554
- )
1555
  ]
 
 
 
 
 
1556
  if not filtered:
1557
- msg["content"] = "" if not msg.get("tool_calls") else None
1558
  else:
1559
- msg["content"] = filtered
1560
  return messages
1561
 
1562
  def _looks_like_compacted_thinking_turn(self, msg: Dict[str, Any]) -> bool:
1563
  """
1564
  Detect if a message looks like it was compacted from a thinking-enabled turn.
1565
 
1566
- Heuristics:
1567
- 1. Has tool_calls (typical thinking flow produces tool calls)
1568
- 2. Content structure suggests stripped thinking (e.g., starts with tool_use directly)
1569
- 3. No text content before tool_use (thinking responses usually have text)
1570
 
1571
  This is imperfect but helps catch common compaction scenarios.
1572
  """
1573
- if not msg.get("tool_calls"):
 
1574
  return False
1575
 
1576
- content = msg.get("content")
 
 
1577
 
1578
- # If content is just tool_use blocks with no text, it might be compacted
1579
- if isinstance(content, list):
1580
- has_text = any(
1581
- isinstance(item, dict)
1582
- and item.get("type") == "text"
1583
- and item.get("text", "").strip()
1584
- for item in content
1585
- )
1586
- has_tool_use = any(
1587
- isinstance(item, dict) and item.get("type") == "tool_use"
1588
- for item in content
1589
- )
1590
 
1591
- # Typical compacted thinking: tool_use without preceding text
1592
- # Normal non-thinking response would have explanatory text
1593
- if has_tool_use and not has_text:
1594
- return True
 
 
 
 
1595
 
1596
- # If content is empty/None but has tool_calls, likely compacted
1597
- if not content and msg.get("tool_calls"):
1598
  return True
1599
 
1600
  return False
@@ -1605,17 +1631,38 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1605
  """
1606
  Try to recover thinking content from cache for a compacted turn.
1607
 
 
 
 
1608
  Returns True if thinking was successfully recovered and injected, False otherwise.
1609
  """
1610
  if turn_start_idx < 0 or turn_start_idx >= len(messages):
1611
  return False
1612
 
1613
  msg = messages[turn_start_idx]
 
1614
 
1615
- # Extract tool_calls for cache key lookup
1616
- tool_calls = msg.get("tool_calls", [])
1617
- content = msg.get("content", "")
1618
- text_content = content if isinstance(content, str) else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1619
 
1620
  # Generate cache key and try to retrieve
1621
  cache_key = self._generate_thinking_cache_key(text_content, tool_calls)
@@ -1640,19 +1687,14 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1640
  )
1641
  return False
1642
 
1643
- # Inject the recovered thinking block
1644
- thinking_block = {
1645
- "type": "thinking",
1646
- "thinking": thinking_text,
1647
- "signature": signature,
1648
  }
1649
 
1650
- if isinstance(content, list):
1651
- msg["content"] = [thinking_block] + content
1652
- elif isinstance(content, str):
1653
- msg["content"] = [thinking_block, {"type": "text", "text": content}]
1654
- else:
1655
- msg["content"] = [thinking_block]
1656
 
1657
  lib_logger.debug(
1658
  f"[Thinking Sanitization] Recovered thinking from cache: {len(thinking_text)} chars"
@@ -1672,7 +1714,7 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1672
  Close an incomplete tool loop by injecting synthetic messages to start a new turn.
1673
 
1674
  This is used when:
1675
- - We're in a tool loop (conversation ends with tool_result)
1676
  - The tool call was made WITHOUT thinking (e.g., by Gemini, non-thinking Claude, or compaction stripped it)
1677
  - We NOW want to enable thinking
1678
 
@@ -1681,8 +1723,8 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1681
  - "To toggle thinking, you must complete the assistant turn first"
1682
  - A non-tool-result user message ends the turn and allows a fresh start
1683
 
1684
- Solution:
1685
- 1. Add synthetic ASSISTANT message to complete the non-thinking turn
1686
  2. Add synthetic USER message to start a NEW turn
1687
  3. Claude will generate thinking for its response to the new turn
1688
 
@@ -1692,47 +1734,61 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1692
  # Strip any old thinking first
1693
  messages = self._strip_all_thinking_blocks(messages)
1694
 
1695
- # Collect tool results from the end of the conversation
1696
- tool_results = []
1697
  for msg in reversed(messages):
1698
- if msg.get("role") == "tool":
1699
- tool_results.append(msg)
1700
- elif msg.get("role") == "assistant":
1701
- break # Stop at the assistant that made the tool calls
1702
-
1703
- tool_results.reverse() # Put back in order
 
 
 
 
 
 
 
 
 
 
 
1704
 
1705
  # Safety check: if no tool results found, this shouldn't have been called
1706
  # But handle gracefully with a generic message
1707
- if not tool_results:
1708
  lib_logger.warning(
1709
  "[Thinking Sanitization] _close_tool_loop_for_thinking called but no tool results found. "
1710
  "This may indicate malformed conversation history."
1711
  )
1712
- synthetic_assistant_content = "[Processing previous context.]"
1713
- elif len(tool_results) == 1:
1714
- synthetic_assistant_content = "[Tool execution completed.]"
1715
  else:
1716
- synthetic_assistant_content = (
1717
- f"[{len(tool_results)} tool executions completed.]"
1718
  )
1719
 
1720
- # Step 1: Inject synthetic ASSISTANT message to complete the non-thinking turn
1721
- synthetic_assistant = {
1722
- "role": "assistant",
1723
- "content": synthetic_assistant_content,
1724
  }
1725
- messages.append(synthetic_assistant)
1726
 
1727
- # Step 2: Inject synthetic USER message to start a NEW turn
1728
  # This allows Claude to generate thinking for its response
1729
  # The message is minimal and unobtrusive - just triggers a new turn
1730
- synthetic_user = {"role": "user", "content": "[Continue]"}
 
 
 
1731
  messages.append(synthetic_user)
1732
 
1733
  lib_logger.info(
1734
  f"[Thinking Sanitization] Closed tool loop with synthetic messages. "
1735
- f"Assistant: '{synthetic_assistant_content}', User: '[Continue]'. "
1736
  f"Claude will now start a fresh turn with thinking enabled."
1737
  )
1738
 
@@ -2981,13 +3037,18 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
2981
  reasoning_effort is not None and reasoning_effort != "disable"
2982
  )
2983
 
2984
- # Sanitize thinking blocks for Claude to prevent 400 errors
 
 
 
 
 
 
2985
  # This handles: context compression, model switching, mid-turn thinking toggle
2986
- # Returns (sanitized_messages, force_disable_thinking)
2987
  force_disable_thinking = False
2988
  if self._is_claude(model) and self._enable_thinking_sanitization:
2989
- messages, force_disable_thinking = self._sanitize_thinking_for_claude(
2990
- messages, thinking_enabled
2991
  )
2992
 
2993
  # If we're in a mid-turn thinking toggle situation, we MUST disable thinking
@@ -2996,10 +3057,6 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
2996
  thinking_enabled = False
2997
  reasoning_effort = "disable" # Force disable for this request
2998
 
2999
- # Transform messages
3000
- system_instruction, gemini_contents = self._transform_messages(messages, model)
3001
- gemini_contents = self._fix_tool_response_grouping(gemini_contents)
3002
-
3003
  # Build payload
3004
  gemini_payload = {"contents": gemini_contents}
3005
 
 
1178
  Returns:
1179
  {
1180
  "in_tool_loop": bool - True if we're in an incomplete tool use loop
1181
+ "turn_start_idx": int - Index of first model message in current turn
1182
  "turn_has_thinking": bool - Whether the TURN started with thinking
1183
+ "last_model_idx": int - Index of last model message
1184
+ "last_model_has_thinking": bool - Whether last model msg has thinking
1185
+ "last_model_has_tool_calls": bool - Whether last model msg has tool calls
1186
+ "pending_tool_results": bool - Whether there are tool results after last model
1187
  "thinking_block_indices": List[int] - Indices of messages with thinking/reasoning
1188
  }
1189
+
1190
+ NOTE: This now operates on Gemini-format messages (after transformation):
1191
+ - Role "model" instead of "assistant"
1192
+ - Role "user" for both user messages AND tool results (with functionResponse)
1193
+ - "parts" array with "thought": true for thinking
1194
+ - "parts" array with "functionCall" for tool calls
1195
+ - "parts" array with "functionResponse" for tool results
1196
  """
1197
  state = {
1198
  "in_tool_loop": False,
1199
  "turn_start_idx": -1,
1200
  "turn_has_thinking": False,
1201
+ "last_assistant_idx": -1, # Keep name for compatibility
1202
  "last_assistant_has_thinking": False,
1203
  "last_assistant_has_tool_calls": False,
1204
  "pending_tool_results": False,
 
1206
  }
1207
 
1208
  # First pass: Find the last "real" user message (not a tool result)
1209
+ # In Gemini format, tool results are "user" role with functionResponse parts
1210
  last_real_user_idx = -1
1211
  for i, msg in enumerate(messages):
1212
  role = msg.get("role")
1213
  if role == "user":
1214
+ # Check if this is a real user message or a tool result container
1215
+ parts = msg.get("parts", [])
1216
+ is_tool_result_msg = any(
1217
+ isinstance(p, dict) and "functionResponse" in p for p in parts
1218
+ )
 
 
 
 
 
 
 
 
 
1219
 
1220
  if not is_tool_result_msg:
1221
  last_real_user_idx = i
 
1224
  for i, msg in enumerate(messages):
1225
  role = msg.get("role")
1226
 
1227
+ if role == "model":
1228
+ # Check for thinking/reasoning content (Gemini format)
1229
  has_thinking = self._message_has_thinking(msg)
1230
 
1231
+ # Check for tool calls (functionCall in parts)
1232
+ parts = msg.get("parts", [])
1233
+ has_tool_calls = any(
1234
+ isinstance(p, dict) and "functionCall" in p for p in parts
1235
+ )
1236
+
1237
  # Track if this is the turn start
1238
  if i > last_real_user_idx and state["turn_start_idx"] == -1:
1239
  state["turn_start_idx"] = i
1240
  state["turn_has_thinking"] = has_thinking
1241
 
1242
  state["last_assistant_idx"] = i
1243
+ state["last_assistant_has_tool_calls"] = has_tool_calls
1244
  state["last_assistant_has_thinking"] = has_thinking
1245
 
1246
  if has_thinking:
1247
  state["thinking_block_indices"].append(i)
1248
 
1249
+ elif role == "user":
1250
+ # Check if this is a tool result (functionResponse in parts)
1251
+ parts = msg.get("parts", [])
1252
+ is_tool_result = any(
1253
+ isinstance(p, dict) and "functionResponse" in p for p in parts
1254
+ )
1255
+
1256
+ if is_tool_result and state["last_assistant_has_tool_calls"]:
1257
  state["pending_tool_results"] = True
1258
 
1259
  # We're in a tool loop if:
1260
  # 1. There are pending tool results
1261
+ # 2. The conversation ends with tool results (last message is user with functionResponse)
1262
  if state["pending_tool_results"] and messages:
1263
  last_msg = messages[-1]
1264
+ if last_msg.get("role") == "user":
1265
+ parts = last_msg.get("parts", [])
1266
+ ends_with_tool_result = any(
1267
+ isinstance(p, dict) and "functionResponse" in p for p in parts
1268
+ )
1269
+ if ends_with_tool_result:
1270
+ state["in_tool_loop"] = True
1271
 
1272
  return state
1273
 
1274
  def _message_has_thinking(self, msg: Dict[str, Any]) -> bool:
1275
+ """
1276
+ Check if a message contains thinking/reasoning content.
 
 
 
 
 
 
 
 
 
1277
 
1278
+ Handles GEMINI format (after transformation):
1279
+ - "parts" array with items having "thought": true
1280
+ """
1281
+ parts = msg.get("parts", [])
1282
+ for part in parts:
1283
+ if isinstance(part, dict) and part.get("thought") is True:
1284
+ return True
1285
  return False
1286
 
1287
+ def _message_has_tool_calls(self, msg: Dict[str, Any]) -> bool:
1288
+ """Check if a message contains tool calls (Gemini format)."""
1289
+ parts = msg.get("parts", [])
1290
+ return any(isinstance(p, dict) and "functionCall" in p for p in parts)
1291
+
1292
  def _sanitize_thinking_for_claude(
1293
  self, messages: List[Dict[str, Any]], thinking_enabled: bool
1294
  ) -> Tuple[List[Dict[str, Any]], bool]:
 
1420
  state["last_assistant_has_tool_calls"]
1421
  and not state["turn_has_thinking"]
1422
  ):
1423
+ # The turn has functionCall but no thinking at turn start.
1424
  # This could be:
1425
  # 1. Compaction removed the thinking block
1426
  # 2. The original call was made without thinking
 
1429
  # For case 2, we let the model respond naturally.
1430
  #
1431
  # We can detect case 1 if there's evidence thinking was expected:
1432
+ # - The turn_start message has functionCall (typical thinking-enabled flow)
1433
  # - The content structure suggests a thinking block was stripped
1434
 
1435
  # Check if turn_start has the hallmarks of a compacted thinking response
 
1453
  messages, state["turn_start_idx"]
1454
  ), False
1455
  else:
1456
+ # Can't recover - add synthetic user to start fresh turn (Gemini format)
1457
  lib_logger.info(
1458
  "[Thinking Sanitization] Detected compacted turn missing thinking block. "
1459
  "Adding synthetic user message to start fresh thinking turn."
1460
  )
1461
  # Add synthetic user message to trigger new turn with thinking
1462
+ synthetic_user = {
1463
+ "role": "user",
1464
+ "parts": [{"text": "[Continue]"}],
1465
+ }
1466
  messages.append(synthetic_user)
1467
  return self._strip_all_thinking_blocks(messages), False
1468
  else:
1469
  lib_logger.debug(
1470
+ "[Thinking Sanitization] Last model has functionCall but no thinking. "
1471
  "This is likely from context compression or non-thinking model. "
1472
  "New response will include thinking naturally."
1473
  )
 
1480
  def _strip_all_thinking_blocks(
1481
  self, messages: List[Dict[str, Any]]
1482
  ) -> List[Dict[str, Any]]:
1483
+ """
1484
+ Remove all thinking/reasoning content from messages.
 
 
 
1485
 
1486
+ Handles GEMINI format (after transformation):
1487
+ - Role "model" instead of "assistant"
1488
+ - "parts" array with "thought": true for thinking
1489
+ """
1490
+ for msg in messages:
1491
+ if msg.get("role") == "model":
1492
+ parts = msg.get("parts", [])
1493
+ if parts:
1494
+ # Filter out thinking parts (those with "thought": true)
1495
  filtered = [
1496
+ p
1497
+ for p in parts
1498
+ if not (isinstance(p, dict) and p.get("thought") is True)
 
 
1499
  ]
1500
+
1501
+ # Check if there are still functionCalls remaining
1502
+ has_function_calls = any(
1503
+ isinstance(p, dict) and "functionCall" in p for p in filtered
1504
+ )
1505
+
1506
  if not filtered:
1507
+ # All parts were thinking - need placeholder for valid structure
1508
+ if not has_function_calls:
1509
+ msg["parts"] = [{"text": ""}]
1510
  else:
1511
+ msg["parts"] = [] # Will be invalid, but shouldn't happen
 
 
1512
  else:
1513
+ msg["parts"] = filtered
1514
  return messages
1515
 
1516
  def _strip_old_turn_thinking(
1517
+ self, messages: List[Dict[str, Any]], last_model_idx: int
1518
  ) -> List[Dict[str, Any]]:
1519
  """
1520
+ Strip thinking from old turns but preserve for the last model turn.
1521
 
1522
  Per Claude docs: "thinking blocks from previous turns are removed from context"
1523
  This mimics the API behavior and prevents issues.
1524
+
1525
+ Handles GEMINI format: role "model", "parts" with "thought": true
1526
  """
1527
  for i, msg in enumerate(messages):
1528
+ if msg.get("role") == "model" and i < last_model_idx:
1529
+ # Old turn - strip thinking parts
1530
+ parts = msg.get("parts", [])
1531
+ if parts:
 
1532
  filtered = [
1533
+ p
1534
+ for p in parts
1535
+ if not (isinstance(p, dict) and p.get("thought") is True)
 
 
1536
  ]
1537
+
1538
+ has_function_calls = any(
1539
+ isinstance(p, dict) and "functionCall" in p for p in filtered
1540
+ )
1541
+
1542
  if not filtered:
1543
+ msg["parts"] = [{"text": ""}] if not has_function_calls else []
1544
  else:
1545
+ msg["parts"] = filtered
1546
  return messages
1547
 
1548
  def _preserve_current_turn_thinking(
1549
+ self, messages: List[Dict[str, Any]], last_model_idx: int
1550
  ) -> List[Dict[str, Any]]:
1551
  """
1552
+ Preserve thinking only for the current (last) model turn.
1553
  Strip from all previous turns.
1554
  """
1555
  # Same as strip_old_turn_thinking - we keep the last turn intact
1556
+ return self._strip_old_turn_thinking(messages, last_model_idx)
1557
 
1558
  def _preserve_turn_start_thinking(
1559
  self, messages: List[Dict[str, Any]], turn_start_idx: int
 
1561
  """
1562
  Preserve thinking at the turn start message.
1563
 
1564
+ In multi-message tool loops, the thinking block is at the FIRST model
1565
  message of the turn (turn_start_idx), not the last one. We need to preserve
1566
  thinking from the turn start, and strip it from all older turns.
1567
+
1568
+ Handles GEMINI format: role "model", "parts" with "thought": true
1569
  """
1570
  for i, msg in enumerate(messages):
1571
+ if msg.get("role") == "model" and i < turn_start_idx:
1572
+ # Old turn - strip thinking parts
1573
+ parts = msg.get("parts", [])
1574
+ if parts:
 
1575
  filtered = [
1576
+ p
1577
+ for p in parts
1578
+ if not (isinstance(p, dict) and p.get("thought") is True)
 
 
1579
  ]
1580
+
1581
+ has_function_calls = any(
1582
+ isinstance(p, dict) and "functionCall" in p for p in filtered
1583
+ )
1584
+
1585
  if not filtered:
1586
+ msg["parts"] = [{"text": ""}] if not has_function_calls else []
1587
  else:
1588
+ msg["parts"] = filtered
1589
  return messages
1590
 
1591
  def _looks_like_compacted_thinking_turn(self, msg: Dict[str, Any]) -> bool:
1592
  """
1593
  Detect if a message looks like it was compacted from a thinking-enabled turn.
1594
 
1595
+ Heuristics (GEMINI format):
1596
+ 1. Has functionCall parts (typical thinking flow produces tool calls)
1597
+ 2. No thinking parts (thought: true)
1598
+ 3. No text content before functionCall (thinking responses usually have text)
1599
 
1600
  This is imperfect but helps catch common compaction scenarios.
1601
  """
1602
+ parts = msg.get("parts", [])
1603
+ if not parts:
1604
  return False
1605
 
1606
+ has_function_call = any(
1607
+ isinstance(p, dict) and "functionCall" in p for p in parts
1608
+ )
1609
 
1610
+ if not has_function_call:
1611
+ return False
 
 
 
 
 
 
 
 
 
 
1612
 
1613
+ # Check for text content (not thinking)
1614
+ has_text = any(
1615
+ isinstance(p, dict)
1616
+ and "text" in p
1617
+ and p.get("text", "").strip()
1618
+ and not p.get("thought") # Exclude thinking text
1619
+ for p in parts
1620
+ )
1621
 
1622
+ # If we have functionCall but no non-thinking text, likely compacted
1623
+ if not has_text:
1624
  return True
1625
 
1626
  return False
 
1631
  """
1632
  Try to recover thinking content from cache for a compacted turn.
1633
 
1634
+ Handles GEMINI format: extracts functionCall for cache key lookup,
1635
+ injects thinking as a part with thought: true.
1636
+
1637
  Returns True if thinking was successfully recovered and injected, False otherwise.
1638
  """
1639
  if turn_start_idx < 0 or turn_start_idx >= len(messages):
1640
  return False
1641
 
1642
  msg = messages[turn_start_idx]
1643
+ parts = msg.get("parts", [])
1644
 
1645
+ # Extract text content and build tool_calls structure for cache key lookup
1646
+ text_content = ""
1647
+ tool_calls = []
1648
+
1649
+ for part in parts:
1650
+ if isinstance(part, dict):
1651
+ if "text" in part and not part.get("thought"):
1652
+ text_content = part["text"]
1653
+ elif "functionCall" in part:
1654
+ fc = part["functionCall"]
1655
+ # Convert to OpenAI tool_calls format for cache key compatibility
1656
+ tool_calls.append(
1657
+ {
1658
+ "id": fc.get("id", ""),
1659
+ "type": "function",
1660
+ "function": {
1661
+ "name": fc.get("name", ""),
1662
+ "arguments": json.dumps(fc.get("args", {})),
1663
+ },
1664
+ }
1665
+ )
1666
 
1667
  # Generate cache key and try to retrieve
1668
  cache_key = self._generate_thinking_cache_key(text_content, tool_calls)
 
1687
  )
1688
  return False
1689
 
1690
+ # Inject the recovered thinking part at the beginning (Gemini format)
1691
+ thinking_part = {
1692
+ "text": thinking_text,
1693
+ "thought": True,
1694
+ "thoughtSignature": signature,
1695
  }
1696
 
1697
+ msg["parts"] = [thinking_part] + parts
 
 
 
 
 
1698
 
1699
  lib_logger.debug(
1700
  f"[Thinking Sanitization] Recovered thinking from cache: {len(thinking_text)} chars"
 
1714
  Close an incomplete tool loop by injecting synthetic messages to start a new turn.
1715
 
1716
  This is used when:
1717
+ - We're in a tool loop (conversation ends with functionResponse)
1718
  - The tool call was made WITHOUT thinking (e.g., by Gemini, non-thinking Claude, or compaction stripped it)
1719
  - We NOW want to enable thinking
1720
 
 
1723
  - "To toggle thinking, you must complete the assistant turn first"
1724
  - A non-tool-result user message ends the turn and allows a fresh start
1725
 
1726
+ Solution (GEMINI format):
1727
+ 1. Add synthetic MODEL message to complete the non-thinking turn
1728
  2. Add synthetic USER message to start a NEW turn
1729
  3. Claude will generate thinking for its response to the new turn
1730
 
 
1734
  # Strip any old thinking first
1735
  messages = self._strip_all_thinking_blocks(messages)
1736
 
1737
+ # Count tool results from the end of the conversation (Gemini format)
1738
+ tool_result_count = 0
1739
  for msg in reversed(messages):
1740
+ if msg.get("role") == "user":
1741
+ parts = msg.get("parts", [])
1742
+ has_function_response = any(
1743
+ isinstance(p, dict) and "functionResponse" in p for p in parts
1744
+ )
1745
+ if has_function_response:
1746
+ tool_result_count += len(
1747
+ [
1748
+ p
1749
+ for p in parts
1750
+ if isinstance(p, dict) and "functionResponse" in p
1751
+ ]
1752
+ )
1753
+ else:
1754
+ break # Real user message, stop counting
1755
+ elif msg.get("role") == "model":
1756
+ break # Stop at the model that made the tool calls
1757
 
1758
  # Safety check: if no tool results found, this shouldn't have been called
1759
  # But handle gracefully with a generic message
1760
+ if tool_result_count == 0:
1761
  lib_logger.warning(
1762
  "[Thinking Sanitization] _close_tool_loop_for_thinking called but no tool results found. "
1763
  "This may indicate malformed conversation history."
1764
  )
1765
+ synthetic_model_content = "[Processing previous context.]"
1766
+ elif tool_result_count == 1:
1767
+ synthetic_model_content = "[Tool execution completed.]"
1768
  else:
1769
+ synthetic_model_content = (
1770
+ f"[{tool_result_count} tool executions completed.]"
1771
  )
1772
 
1773
+ # Step 1: Inject synthetic MODEL message to complete the non-thinking turn (Gemini format)
1774
+ synthetic_model = {
1775
+ "role": "model",
1776
+ "parts": [{"text": synthetic_model_content}],
1777
  }
1778
+ messages.append(synthetic_model)
1779
 
1780
+ # Step 2: Inject synthetic USER message to start a NEW turn (Gemini format)
1781
  # This allows Claude to generate thinking for its response
1782
  # The message is minimal and unobtrusive - just triggers a new turn
1783
+ synthetic_user = {
1784
+ "role": "user",
1785
+ "parts": [{"text": "[Continue]"}],
1786
+ }
1787
  messages.append(synthetic_user)
1788
 
1789
  lib_logger.info(
1790
  f"[Thinking Sanitization] Closed tool loop with synthetic messages. "
1791
+ f"Model: '{synthetic_model_content}', User: '[Continue]'. "
1792
  f"Claude will now start a fresh turn with thinking enabled."
1793
  )
1794
 
 
3037
  reasoning_effort is not None and reasoning_effort != "disable"
3038
  )
3039
 
3040
+ # Transform messages to Gemini format FIRST
3041
+ # This restores thinking from cache if reasoning_content was stripped by client
3042
+ system_instruction, gemini_contents = self._transform_messages(messages, model)
3043
+ gemini_contents = self._fix_tool_response_grouping(gemini_contents)
3044
+
3045
+ # Sanitize thinking blocks for Claude AFTER transformation
3046
+ # Now we can see the full picture including cached thinking that was restored
3047
  # This handles: context compression, model switching, mid-turn thinking toggle
 
3048
  force_disable_thinking = False
3049
  if self._is_claude(model) and self._enable_thinking_sanitization:
3050
+ gemini_contents, force_disable_thinking = (
3051
+ self._sanitize_thinking_for_claude(gemini_contents, thinking_enabled)
3052
  )
3053
 
3054
  # If we're in a mid-turn thinking toggle situation, we MUST disable thinking
 
3057
  thinking_enabled = False
3058
  reasoning_effort = "disable" # Force disable for this request
3059
 
 
 
 
 
3060
  # Build payload
3061
  gemini_payload = {"contents": gemini_contents}
3062