Spaces:
Paused
refactor(antigravity): 🔨 migrate thinking sanitization to gemini message format
Browse filesThis commit refactors the Claude thinking sanitization logic to operate on Gemini-format messages after transformation, rather than before. This enables the sanitization to work with the full message context including thinking blocks that were restored from cache during the transformation process.
Key changes:
- Move `_sanitize_thinking_for_claude` call to after `_transform_messages` instead of before
- Update all thinking detection and manipulation methods to work with Gemini format (role "model", "parts" array with "thought": true)
- Refactor `_analyze_turn_state` to detect tool results as user messages with "functionResponse" parts
- Update `_message_has_thinking` to check for "thought": true in parts array
- Add new `_message_has_tool_calls` helper for Gemini format detection
- Refactor `_strip_all_thinking_blocks` to filter parts with "thought": true
- Update `_strip_old_turn_thinking` and `_preserve_turn_start_thinking` for Gemini format
- Refactor `_looks_like_compacted_thinking_turn` to detect functionCall parts without thinking
- Update `_recover_thinking_from_cache` to inject thinking as Gemini-format part with "thought": true
- Refactor `_close_tool_loop_for_thinking` to use Gemini message structure
- Update all docstrings and comments to reflect "model" role instead of "assistant"
This change fixes issues where context compression or client-side stripping of reasoning_content would prevent proper thinking sanitization, as the sanitization now occurs after the transformation has restored thinking from cache.
|
@@ -1178,20 +1178,27 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1178 |
Returns:
|
| 1179 |
{
|
| 1180 |
"in_tool_loop": bool - True if we're in an incomplete tool use loop
|
| 1181 |
-
"turn_start_idx": int - Index of first
|
| 1182 |
"turn_has_thinking": bool - Whether the TURN started with thinking
|
| 1183 |
-
"
|
| 1184 |
-
"
|
| 1185 |
-
"
|
| 1186 |
-
"pending_tool_results": bool - Whether there are tool results after last
|
| 1187 |
"thinking_block_indices": List[int] - Indices of messages with thinking/reasoning
|
| 1188 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1189 |
"""
|
| 1190 |
state = {
|
| 1191 |
"in_tool_loop": False,
|
| 1192 |
"turn_start_idx": -1,
|
| 1193 |
"turn_has_thinking": False,
|
| 1194 |
-
"last_assistant_idx": -1,
|
| 1195 |
"last_assistant_has_thinking": False,
|
| 1196 |
"last_assistant_has_tool_calls": False,
|
| 1197 |
"pending_tool_results": False,
|
|
@@ -1199,25 +1206,16 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1199 |
}
|
| 1200 |
|
| 1201 |
# First pass: Find the last "real" user message (not a tool result)
|
| 1202 |
-
#
|
| 1203 |
last_real_user_idx = -1
|
| 1204 |
for i, msg in enumerate(messages):
|
| 1205 |
role = msg.get("role")
|
| 1206 |
if role == "user":
|
| 1207 |
-
# Check if this is a real user message or
|
| 1208 |
-
|
| 1209 |
-
|
| 1210 |
-
|
| 1211 |
-
|
| 1212 |
-
content = msg.get("content")
|
| 1213 |
-
|
| 1214 |
-
# If content is a list with tool_result items, it's a tool response
|
| 1215 |
-
is_tool_result_msg = False
|
| 1216 |
-
if isinstance(content, list):
|
| 1217 |
-
for item in content:
|
| 1218 |
-
if isinstance(item, dict) and item.get("type") == "tool_result":
|
| 1219 |
-
is_tool_result_msg = True
|
| 1220 |
-
break
|
| 1221 |
|
| 1222 |
if not is_tool_result_msg:
|
| 1223 |
last_real_user_idx = i
|
|
@@ -1226,52 +1224,71 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1226 |
for i, msg in enumerate(messages):
|
| 1227 |
role = msg.get("role")
|
| 1228 |
|
| 1229 |
-
if role == "
|
| 1230 |
-
# Check for thinking/reasoning content
|
| 1231 |
has_thinking = self._message_has_thinking(msg)
|
| 1232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1233 |
# Track if this is the turn start
|
| 1234 |
if i > last_real_user_idx and state["turn_start_idx"] == -1:
|
| 1235 |
state["turn_start_idx"] = i
|
| 1236 |
state["turn_has_thinking"] = has_thinking
|
| 1237 |
|
| 1238 |
state["last_assistant_idx"] = i
|
| 1239 |
-
state["last_assistant_has_tool_calls"] =
|
| 1240 |
state["last_assistant_has_thinking"] = has_thinking
|
| 1241 |
|
| 1242 |
if has_thinking:
|
| 1243 |
state["thinking_block_indices"].append(i)
|
| 1244 |
|
| 1245 |
-
elif role == "
|
| 1246 |
-
#
|
| 1247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1248 |
state["pending_tool_results"] = True
|
| 1249 |
|
| 1250 |
# We're in a tool loop if:
|
| 1251 |
# 1. There are pending tool results
|
| 1252 |
-
# 2. The conversation ends with tool results (last message is
|
| 1253 |
if state["pending_tool_results"] and messages:
|
| 1254 |
last_msg = messages[-1]
|
| 1255 |
-
if last_msg.get("role") == "
|
| 1256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1257 |
|
| 1258 |
return state
|
| 1259 |
|
| 1260 |
def _message_has_thinking(self, msg: Dict[str, Any]) -> bool:
|
| 1261 |
-
"""
|
| 1262 |
-
|
| 1263 |
-
if msg.get("reasoning_content"):
|
| 1264 |
-
return True
|
| 1265 |
-
|
| 1266 |
-
# Check for thinking in content array (some formats)
|
| 1267 |
-
content = msg.get("content")
|
| 1268 |
-
if isinstance(content, list):
|
| 1269 |
-
for item in content:
|
| 1270 |
-
if isinstance(item, dict) and item.get("type") == "thinking":
|
| 1271 |
-
return True
|
| 1272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1273 |
return False
|
| 1274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1275 |
def _sanitize_thinking_for_claude(
|
| 1276 |
self, messages: List[Dict[str, Any]], thinking_enabled: bool
|
| 1277 |
) -> Tuple[List[Dict[str, Any]], bool]:
|
|
@@ -1403,7 +1420,7 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1403 |
state["last_assistant_has_tool_calls"]
|
| 1404 |
and not state["turn_has_thinking"]
|
| 1405 |
):
|
| 1406 |
-
# The turn has
|
| 1407 |
# This could be:
|
| 1408 |
# 1. Compaction removed the thinking block
|
| 1409 |
# 2. The original call was made without thinking
|
|
@@ -1412,7 +1429,7 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1412 |
# For case 2, we let the model respond naturally.
|
| 1413 |
#
|
| 1414 |
# We can detect case 1 if there's evidence thinking was expected:
|
| 1415 |
-
# - The turn_start message has
|
| 1416 |
# - The content structure suggests a thinking block was stripped
|
| 1417 |
|
| 1418 |
# Check if turn_start has the hallmarks of a compacted thinking response
|
|
@@ -1436,18 +1453,21 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1436 |
messages, state["turn_start_idx"]
|
| 1437 |
), False
|
| 1438 |
else:
|
| 1439 |
-
# Can't recover - add synthetic user to start fresh turn
|
| 1440 |
lib_logger.info(
|
| 1441 |
"[Thinking Sanitization] Detected compacted turn missing thinking block. "
|
| 1442 |
"Adding synthetic user message to start fresh thinking turn."
|
| 1443 |
)
|
| 1444 |
# Add synthetic user message to trigger new turn with thinking
|
| 1445 |
-
synthetic_user = {
|
|
|
|
|
|
|
|
|
|
| 1446 |
messages.append(synthetic_user)
|
| 1447 |
return self._strip_all_thinking_blocks(messages), False
|
| 1448 |
else:
|
| 1449 |
lib_logger.debug(
|
| 1450 |
-
"[Thinking Sanitization] Last
|
| 1451 |
"This is likely from context compression or non-thinking model. "
|
| 1452 |
"New response will include thinking naturally."
|
| 1453 |
)
|
|
@@ -1460,75 +1480,80 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1460 |
def _strip_all_thinking_blocks(
|
| 1461 |
self, messages: List[Dict[str, Any]]
|
| 1462 |
) -> List[Dict[str, Any]]:
|
| 1463 |
-
"""
|
| 1464 |
-
|
| 1465 |
-
if msg.get("role") == "assistant":
|
| 1466 |
-
# Remove reasoning_content field
|
| 1467 |
-
msg.pop("reasoning_content", None)
|
| 1468 |
|
| 1469 |
-
|
| 1470 |
-
|
| 1471 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1472 |
filtered = [
|
| 1473 |
-
|
| 1474 |
-
for
|
| 1475 |
-
if not (
|
| 1476 |
-
isinstance(item, dict) and item.get("type") == "thinking"
|
| 1477 |
-
)
|
| 1478 |
]
|
| 1479 |
-
|
| 1480 |
-
#
|
| 1481 |
-
|
|
|
|
|
|
|
|
|
|
| 1482 |
if not filtered:
|
| 1483 |
-
#
|
| 1484 |
-
if not
|
| 1485 |
-
msg["
|
| 1486 |
else:
|
| 1487 |
-
msg["
|
| 1488 |
-
None # tool_calls exist, content not needed
|
| 1489 |
-
)
|
| 1490 |
else:
|
| 1491 |
-
msg["
|
| 1492 |
return messages
|
| 1493 |
|
| 1494 |
def _strip_old_turn_thinking(
|
| 1495 |
-
self, messages: List[Dict[str, Any]],
|
| 1496 |
) -> List[Dict[str, Any]]:
|
| 1497 |
"""
|
| 1498 |
-
Strip thinking from old turns but preserve for the last
|
| 1499 |
|
| 1500 |
Per Claude docs: "thinking blocks from previous turns are removed from context"
|
| 1501 |
This mimics the API behavior and prevents issues.
|
|
|
|
|
|
|
| 1502 |
"""
|
| 1503 |
for i, msg in enumerate(messages):
|
| 1504 |
-
if msg.get("role") == "
|
| 1505 |
-
# Old turn - strip thinking
|
| 1506 |
-
msg.
|
| 1507 |
-
|
| 1508 |
-
if isinstance(content, list):
|
| 1509 |
filtered = [
|
| 1510 |
-
|
| 1511 |
-
for
|
| 1512 |
-
if not (
|
| 1513 |
-
isinstance(item, dict) and item.get("type") == "thinking"
|
| 1514 |
-
)
|
| 1515 |
]
|
| 1516 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1517 |
if not filtered:
|
| 1518 |
-
msg["
|
| 1519 |
else:
|
| 1520 |
-
msg["
|
| 1521 |
return messages
|
| 1522 |
|
| 1523 |
def _preserve_current_turn_thinking(
|
| 1524 |
-
self, messages: List[Dict[str, Any]],
|
| 1525 |
) -> List[Dict[str, Any]]:
|
| 1526 |
"""
|
| 1527 |
-
Preserve thinking only for the current (last)
|
| 1528 |
Strip from all previous turns.
|
| 1529 |
"""
|
| 1530 |
# Same as strip_old_turn_thinking - we keep the last turn intact
|
| 1531 |
-
return self._strip_old_turn_thinking(messages,
|
| 1532 |
|
| 1533 |
def _preserve_turn_start_thinking(
|
| 1534 |
self, messages: List[Dict[str, Any]], turn_start_idx: int
|
|
@@ -1536,65 +1561,66 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1536 |
"""
|
| 1537 |
Preserve thinking at the turn start message.
|
| 1538 |
|
| 1539 |
-
In multi-message tool loops, the thinking block is at the FIRST
|
| 1540 |
message of the turn (turn_start_idx), not the last one. We need to preserve
|
| 1541 |
thinking from the turn start, and strip it from all older turns.
|
|
|
|
|
|
|
| 1542 |
"""
|
| 1543 |
for i, msg in enumerate(messages):
|
| 1544 |
-
if msg.get("role") == "
|
| 1545 |
-
# Old turn - strip thinking
|
| 1546 |
-
msg.
|
| 1547 |
-
|
| 1548 |
-
if isinstance(content, list):
|
| 1549 |
filtered = [
|
| 1550 |
-
|
| 1551 |
-
for
|
| 1552 |
-
if not (
|
| 1553 |
-
isinstance(item, dict) and item.get("type") == "thinking"
|
| 1554 |
-
)
|
| 1555 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1556 |
if not filtered:
|
| 1557 |
-
msg["
|
| 1558 |
else:
|
| 1559 |
-
msg["
|
| 1560 |
return messages
|
| 1561 |
|
| 1562 |
def _looks_like_compacted_thinking_turn(self, msg: Dict[str, Any]) -> bool:
|
| 1563 |
"""
|
| 1564 |
Detect if a message looks like it was compacted from a thinking-enabled turn.
|
| 1565 |
|
| 1566 |
-
Heuristics:
|
| 1567 |
-
1. Has
|
| 1568 |
-
2.
|
| 1569 |
-
3. No text content before
|
| 1570 |
|
| 1571 |
This is imperfect but helps catch common compaction scenarios.
|
| 1572 |
"""
|
| 1573 |
-
|
|
|
|
| 1574 |
return False
|
| 1575 |
|
| 1576 |
-
|
|
|
|
|
|
|
| 1577 |
|
| 1578 |
-
|
| 1579 |
-
|
| 1580 |
-
has_text = any(
|
| 1581 |
-
isinstance(item, dict)
|
| 1582 |
-
and item.get("type") == "text"
|
| 1583 |
-
and item.get("text", "").strip()
|
| 1584 |
-
for item in content
|
| 1585 |
-
)
|
| 1586 |
-
has_tool_use = any(
|
| 1587 |
-
isinstance(item, dict) and item.get("type") == "tool_use"
|
| 1588 |
-
for item in content
|
| 1589 |
-
)
|
| 1590 |
|
| 1591 |
-
|
| 1592 |
-
|
| 1593 |
-
|
| 1594 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1595 |
|
| 1596 |
-
# If
|
| 1597 |
-
if not
|
| 1598 |
return True
|
| 1599 |
|
| 1600 |
return False
|
|
@@ -1605,17 +1631,38 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1605 |
"""
|
| 1606 |
Try to recover thinking content from cache for a compacted turn.
|
| 1607 |
|
|
|
|
|
|
|
|
|
|
| 1608 |
Returns True if thinking was successfully recovered and injected, False otherwise.
|
| 1609 |
"""
|
| 1610 |
if turn_start_idx < 0 or turn_start_idx >= len(messages):
|
| 1611 |
return False
|
| 1612 |
|
| 1613 |
msg = messages[turn_start_idx]
|
|
|
|
| 1614 |
|
| 1615 |
-
# Extract tool_calls for cache key lookup
|
| 1616 |
-
|
| 1617 |
-
|
| 1618 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1619 |
|
| 1620 |
# Generate cache key and try to retrieve
|
| 1621 |
cache_key = self._generate_thinking_cache_key(text_content, tool_calls)
|
|
@@ -1640,19 +1687,14 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1640 |
)
|
| 1641 |
return False
|
| 1642 |
|
| 1643 |
-
# Inject the recovered thinking
|
| 1644 |
-
|
| 1645 |
-
"
|
| 1646 |
-
"
|
| 1647 |
-
"
|
| 1648 |
}
|
| 1649 |
|
| 1650 |
-
|
| 1651 |
-
msg["content"] = [thinking_block] + content
|
| 1652 |
-
elif isinstance(content, str):
|
| 1653 |
-
msg["content"] = [thinking_block, {"type": "text", "text": content}]
|
| 1654 |
-
else:
|
| 1655 |
-
msg["content"] = [thinking_block]
|
| 1656 |
|
| 1657 |
lib_logger.debug(
|
| 1658 |
f"[Thinking Sanitization] Recovered thinking from cache: {len(thinking_text)} chars"
|
|
@@ -1672,7 +1714,7 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1672 |
Close an incomplete tool loop by injecting synthetic messages to start a new turn.
|
| 1673 |
|
| 1674 |
This is used when:
|
| 1675 |
-
- We're in a tool loop (conversation ends with
|
| 1676 |
- The tool call was made WITHOUT thinking (e.g., by Gemini, non-thinking Claude, or compaction stripped it)
|
| 1677 |
- We NOW want to enable thinking
|
| 1678 |
|
|
@@ -1681,8 +1723,8 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1681 |
- "To toggle thinking, you must complete the assistant turn first"
|
| 1682 |
- A non-tool-result user message ends the turn and allows a fresh start
|
| 1683 |
|
| 1684 |
-
Solution:
|
| 1685 |
-
1. Add synthetic
|
| 1686 |
2. Add synthetic USER message to start a NEW turn
|
| 1687 |
3. Claude will generate thinking for its response to the new turn
|
| 1688 |
|
|
@@ -1692,47 +1734,61 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1692 |
# Strip any old thinking first
|
| 1693 |
messages = self._strip_all_thinking_blocks(messages)
|
| 1694 |
|
| 1695 |
-
#
|
| 1696 |
-
|
| 1697 |
for msg in reversed(messages):
|
| 1698 |
-
if msg.get("role") == "
|
| 1699 |
-
|
| 1700 |
-
|
| 1701 |
-
|
| 1702 |
-
|
| 1703 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1704 |
|
| 1705 |
# Safety check: if no tool results found, this shouldn't have been called
|
| 1706 |
# But handle gracefully with a generic message
|
| 1707 |
-
if
|
| 1708 |
lib_logger.warning(
|
| 1709 |
"[Thinking Sanitization] _close_tool_loop_for_thinking called but no tool results found. "
|
| 1710 |
"This may indicate malformed conversation history."
|
| 1711 |
)
|
| 1712 |
-
|
| 1713 |
-
elif
|
| 1714 |
-
|
| 1715 |
else:
|
| 1716 |
-
|
| 1717 |
-
f"[{
|
| 1718 |
)
|
| 1719 |
|
| 1720 |
-
# Step 1: Inject synthetic
|
| 1721 |
-
|
| 1722 |
-
"role": "
|
| 1723 |
-
"
|
| 1724 |
}
|
| 1725 |
-
messages.append(
|
| 1726 |
|
| 1727 |
-
# Step 2: Inject synthetic USER message to start a NEW turn
|
| 1728 |
# This allows Claude to generate thinking for its response
|
| 1729 |
# The message is minimal and unobtrusive - just triggers a new turn
|
| 1730 |
-
synthetic_user = {
|
|
|
|
|
|
|
|
|
|
| 1731 |
messages.append(synthetic_user)
|
| 1732 |
|
| 1733 |
lib_logger.info(
|
| 1734 |
f"[Thinking Sanitization] Closed tool loop with synthetic messages. "
|
| 1735 |
-
f"
|
| 1736 |
f"Claude will now start a fresh turn with thinking enabled."
|
| 1737 |
)
|
| 1738 |
|
|
@@ -2981,13 +3037,18 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 2981 |
reasoning_effort is not None and reasoning_effort != "disable"
|
| 2982 |
)
|
| 2983 |
|
| 2984 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2985 |
# This handles: context compression, model switching, mid-turn thinking toggle
|
| 2986 |
-
# Returns (sanitized_messages, force_disable_thinking)
|
| 2987 |
force_disable_thinking = False
|
| 2988 |
if self._is_claude(model) and self._enable_thinking_sanitization:
|
| 2989 |
-
|
| 2990 |
-
|
| 2991 |
)
|
| 2992 |
|
| 2993 |
# If we're in a mid-turn thinking toggle situation, we MUST disable thinking
|
|
@@ -2996,10 +3057,6 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 2996 |
thinking_enabled = False
|
| 2997 |
reasoning_effort = "disable" # Force disable for this request
|
| 2998 |
|
| 2999 |
-
# Transform messages
|
| 3000 |
-
system_instruction, gemini_contents = self._transform_messages(messages, model)
|
| 3001 |
-
gemini_contents = self._fix_tool_response_grouping(gemini_contents)
|
| 3002 |
-
|
| 3003 |
# Build payload
|
| 3004 |
gemini_payload = {"contents": gemini_contents}
|
| 3005 |
|
|
|
|
| 1178 |
Returns:
|
| 1179 |
{
|
| 1180 |
"in_tool_loop": bool - True if we're in an incomplete tool use loop
|
| 1181 |
+
"turn_start_idx": int - Index of first model message in current turn
|
| 1182 |
"turn_has_thinking": bool - Whether the TURN started with thinking
|
| 1183 |
+
"last_model_idx": int - Index of last model message
|
| 1184 |
+
"last_model_has_thinking": bool - Whether last model msg has thinking
|
| 1185 |
+
"last_model_has_tool_calls": bool - Whether last model msg has tool calls
|
| 1186 |
+
"pending_tool_results": bool - Whether there are tool results after last model
|
| 1187 |
"thinking_block_indices": List[int] - Indices of messages with thinking/reasoning
|
| 1188 |
}
|
| 1189 |
+
|
| 1190 |
+
NOTE: This now operates on Gemini-format messages (after transformation):
|
| 1191 |
+
- Role "model" instead of "assistant"
|
| 1192 |
+
- Role "user" for both user messages AND tool results (with functionResponse)
|
| 1193 |
+
- "parts" array with "thought": true for thinking
|
| 1194 |
+
- "parts" array with "functionCall" for tool calls
|
| 1195 |
+
- "parts" array with "functionResponse" for tool results
|
| 1196 |
"""
|
| 1197 |
state = {
|
| 1198 |
"in_tool_loop": False,
|
| 1199 |
"turn_start_idx": -1,
|
| 1200 |
"turn_has_thinking": False,
|
| 1201 |
+
"last_assistant_idx": -1, # Keep name for compatibility
|
| 1202 |
"last_assistant_has_thinking": False,
|
| 1203 |
"last_assistant_has_tool_calls": False,
|
| 1204 |
"pending_tool_results": False,
|
|
|
|
| 1206 |
}
|
| 1207 |
|
| 1208 |
# First pass: Find the last "real" user message (not a tool result)
|
| 1209 |
+
# In Gemini format, tool results are "user" role with functionResponse parts
|
| 1210 |
last_real_user_idx = -1
|
| 1211 |
for i, msg in enumerate(messages):
|
| 1212 |
role = msg.get("role")
|
| 1213 |
if role == "user":
|
| 1214 |
+
# Check if this is a real user message or a tool result container
|
| 1215 |
+
parts = msg.get("parts", [])
|
| 1216 |
+
is_tool_result_msg = any(
|
| 1217 |
+
isinstance(p, dict) and "functionResponse" in p for p in parts
|
| 1218 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1219 |
|
| 1220 |
if not is_tool_result_msg:
|
| 1221 |
last_real_user_idx = i
|
|
|
|
| 1224 |
for i, msg in enumerate(messages):
|
| 1225 |
role = msg.get("role")
|
| 1226 |
|
| 1227 |
+
if role == "model":
|
| 1228 |
+
# Check for thinking/reasoning content (Gemini format)
|
| 1229 |
has_thinking = self._message_has_thinking(msg)
|
| 1230 |
|
| 1231 |
+
# Check for tool calls (functionCall in parts)
|
| 1232 |
+
parts = msg.get("parts", [])
|
| 1233 |
+
has_tool_calls = any(
|
| 1234 |
+
isinstance(p, dict) and "functionCall" in p for p in parts
|
| 1235 |
+
)
|
| 1236 |
+
|
| 1237 |
# Track if this is the turn start
|
| 1238 |
if i > last_real_user_idx and state["turn_start_idx"] == -1:
|
| 1239 |
state["turn_start_idx"] = i
|
| 1240 |
state["turn_has_thinking"] = has_thinking
|
| 1241 |
|
| 1242 |
state["last_assistant_idx"] = i
|
| 1243 |
+
state["last_assistant_has_tool_calls"] = has_tool_calls
|
| 1244 |
state["last_assistant_has_thinking"] = has_thinking
|
| 1245 |
|
| 1246 |
if has_thinking:
|
| 1247 |
state["thinking_block_indices"].append(i)
|
| 1248 |
|
| 1249 |
+
elif role == "user":
|
| 1250 |
+
# Check if this is a tool result (functionResponse in parts)
|
| 1251 |
+
parts = msg.get("parts", [])
|
| 1252 |
+
is_tool_result = any(
|
| 1253 |
+
isinstance(p, dict) and "functionResponse" in p for p in parts
|
| 1254 |
+
)
|
| 1255 |
+
|
| 1256 |
+
if is_tool_result and state["last_assistant_has_tool_calls"]:
|
| 1257 |
state["pending_tool_results"] = True
|
| 1258 |
|
| 1259 |
# We're in a tool loop if:
|
| 1260 |
# 1. There are pending tool results
|
| 1261 |
+
# 2. The conversation ends with tool results (last message is user with functionResponse)
|
| 1262 |
if state["pending_tool_results"] and messages:
|
| 1263 |
last_msg = messages[-1]
|
| 1264 |
+
if last_msg.get("role") == "user":
|
| 1265 |
+
parts = last_msg.get("parts", [])
|
| 1266 |
+
ends_with_tool_result = any(
|
| 1267 |
+
isinstance(p, dict) and "functionResponse" in p for p in parts
|
| 1268 |
+
)
|
| 1269 |
+
if ends_with_tool_result:
|
| 1270 |
+
state["in_tool_loop"] = True
|
| 1271 |
|
| 1272 |
return state
|
| 1273 |
|
| 1274 |
def _message_has_thinking(self, msg: Dict[str, Any]) -> bool:
|
| 1275 |
+
"""
|
| 1276 |
+
Check if a message contains thinking/reasoning content.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1277 |
|
| 1278 |
+
Handles GEMINI format (after transformation):
|
| 1279 |
+
- "parts" array with items having "thought": true
|
| 1280 |
+
"""
|
| 1281 |
+
parts = msg.get("parts", [])
|
| 1282 |
+
for part in parts:
|
| 1283 |
+
if isinstance(part, dict) and part.get("thought") is True:
|
| 1284 |
+
return True
|
| 1285 |
return False
|
| 1286 |
|
| 1287 |
+
def _message_has_tool_calls(self, msg: Dict[str, Any]) -> bool:
|
| 1288 |
+
"""Check if a message contains tool calls (Gemini format)."""
|
| 1289 |
+
parts = msg.get("parts", [])
|
| 1290 |
+
return any(isinstance(p, dict) and "functionCall" in p for p in parts)
|
| 1291 |
+
|
| 1292 |
def _sanitize_thinking_for_claude(
|
| 1293 |
self, messages: List[Dict[str, Any]], thinking_enabled: bool
|
| 1294 |
) -> Tuple[List[Dict[str, Any]], bool]:
|
|
|
|
| 1420 |
state["last_assistant_has_tool_calls"]
|
| 1421 |
and not state["turn_has_thinking"]
|
| 1422 |
):
|
| 1423 |
+
# The turn has functionCall but no thinking at turn start.
|
| 1424 |
# This could be:
|
| 1425 |
# 1. Compaction removed the thinking block
|
| 1426 |
# 2. The original call was made without thinking
|
|
|
|
| 1429 |
# For case 2, we let the model respond naturally.
|
| 1430 |
#
|
| 1431 |
# We can detect case 1 if there's evidence thinking was expected:
|
| 1432 |
+
# - The turn_start message has functionCall (typical thinking-enabled flow)
|
| 1433 |
# - The content structure suggests a thinking block was stripped
|
| 1434 |
|
| 1435 |
# Check if turn_start has the hallmarks of a compacted thinking response
|
|
|
|
| 1453 |
messages, state["turn_start_idx"]
|
| 1454 |
), False
|
| 1455 |
else:
|
| 1456 |
+
# Can't recover - add synthetic user to start fresh turn (Gemini format)
|
| 1457 |
lib_logger.info(
|
| 1458 |
"[Thinking Sanitization] Detected compacted turn missing thinking block. "
|
| 1459 |
"Adding synthetic user message to start fresh thinking turn."
|
| 1460 |
)
|
| 1461 |
# Add synthetic user message to trigger new turn with thinking
|
| 1462 |
+
synthetic_user = {
|
| 1463 |
+
"role": "user",
|
| 1464 |
+
"parts": [{"text": "[Continue]"}],
|
| 1465 |
+
}
|
| 1466 |
messages.append(synthetic_user)
|
| 1467 |
return self._strip_all_thinking_blocks(messages), False
|
| 1468 |
else:
|
| 1469 |
lib_logger.debug(
|
| 1470 |
+
"[Thinking Sanitization] Last model has functionCall but no thinking. "
|
| 1471 |
"This is likely from context compression or non-thinking model. "
|
| 1472 |
"New response will include thinking naturally."
|
| 1473 |
)
|
|
|
|
| 1480 |
def _strip_all_thinking_blocks(
|
| 1481 |
self, messages: List[Dict[str, Any]]
|
| 1482 |
) -> List[Dict[str, Any]]:
|
| 1483 |
+
"""
|
| 1484 |
+
Remove all thinking/reasoning content from messages.
|
|
|
|
|
|
|
|
|
|
| 1485 |
|
| 1486 |
+
Handles GEMINI format (after transformation):
|
| 1487 |
+
- Role "model" instead of "assistant"
|
| 1488 |
+
- "parts" array with "thought": true for thinking
|
| 1489 |
+
"""
|
| 1490 |
+
for msg in messages:
|
| 1491 |
+
if msg.get("role") == "model":
|
| 1492 |
+
parts = msg.get("parts", [])
|
| 1493 |
+
if parts:
|
| 1494 |
+
# Filter out thinking parts (those with "thought": true)
|
| 1495 |
filtered = [
|
| 1496 |
+
p
|
| 1497 |
+
for p in parts
|
| 1498 |
+
if not (isinstance(p, dict) and p.get("thought") is True)
|
|
|
|
|
|
|
| 1499 |
]
|
| 1500 |
+
|
| 1501 |
+
# Check if there are still functionCalls remaining
|
| 1502 |
+
has_function_calls = any(
|
| 1503 |
+
isinstance(p, dict) and "functionCall" in p for p in filtered
|
| 1504 |
+
)
|
| 1505 |
+
|
| 1506 |
if not filtered:
|
| 1507 |
+
# All parts were thinking - need placeholder for valid structure
|
| 1508 |
+
if not has_function_calls:
|
| 1509 |
+
msg["parts"] = [{"text": ""}]
|
| 1510 |
else:
|
| 1511 |
+
msg["parts"] = [] # Will be invalid, but shouldn't happen
|
|
|
|
|
|
|
| 1512 |
else:
|
| 1513 |
+
msg["parts"] = filtered
|
| 1514 |
return messages
|
| 1515 |
|
| 1516 |
def _strip_old_turn_thinking(
|
| 1517 |
+
self, messages: List[Dict[str, Any]], last_model_idx: int
|
| 1518 |
) -> List[Dict[str, Any]]:
|
| 1519 |
"""
|
| 1520 |
+
Strip thinking from old turns but preserve for the last model turn.
|
| 1521 |
|
| 1522 |
Per Claude docs: "thinking blocks from previous turns are removed from context"
|
| 1523 |
This mimics the API behavior and prevents issues.
|
| 1524 |
+
|
| 1525 |
+
Handles GEMINI format: role "model", "parts" with "thought": true
|
| 1526 |
"""
|
| 1527 |
for i, msg in enumerate(messages):
|
| 1528 |
+
if msg.get("role") == "model" and i < last_model_idx:
|
| 1529 |
+
# Old turn - strip thinking parts
|
| 1530 |
+
parts = msg.get("parts", [])
|
| 1531 |
+
if parts:
|
|
|
|
| 1532 |
filtered = [
|
| 1533 |
+
p
|
| 1534 |
+
for p in parts
|
| 1535 |
+
if not (isinstance(p, dict) and p.get("thought") is True)
|
|
|
|
|
|
|
| 1536 |
]
|
| 1537 |
+
|
| 1538 |
+
has_function_calls = any(
|
| 1539 |
+
isinstance(p, dict) and "functionCall" in p for p in filtered
|
| 1540 |
+
)
|
| 1541 |
+
|
| 1542 |
if not filtered:
|
| 1543 |
+
msg["parts"] = [{"text": ""}] if not has_function_calls else []
|
| 1544 |
else:
|
| 1545 |
+
msg["parts"] = filtered
|
| 1546 |
return messages
|
| 1547 |
|
| 1548 |
def _preserve_current_turn_thinking(
|
| 1549 |
+
self, messages: List[Dict[str, Any]], last_model_idx: int
|
| 1550 |
) -> List[Dict[str, Any]]:
|
| 1551 |
"""
|
| 1552 |
+
Preserve thinking only for the current (last) model turn.
|
| 1553 |
Strip from all previous turns.
|
| 1554 |
"""
|
| 1555 |
# Same as strip_old_turn_thinking - we keep the last turn intact
|
| 1556 |
+
return self._strip_old_turn_thinking(messages, last_model_idx)
|
| 1557 |
|
| 1558 |
def _preserve_turn_start_thinking(
|
| 1559 |
self, messages: List[Dict[str, Any]], turn_start_idx: int
|
|
|
|
| 1561 |
"""
|
| 1562 |
Preserve thinking at the turn start message.
|
| 1563 |
|
| 1564 |
+
In multi-message tool loops, the thinking block is at the FIRST model
|
| 1565 |
message of the turn (turn_start_idx), not the last one. We need to preserve
|
| 1566 |
thinking from the turn start, and strip it from all older turns.
|
| 1567 |
+
|
| 1568 |
+
Handles GEMINI format: role "model", "parts" with "thought": true
|
| 1569 |
"""
|
| 1570 |
for i, msg in enumerate(messages):
|
| 1571 |
+
if msg.get("role") == "model" and i < turn_start_idx:
|
| 1572 |
+
# Old turn - strip thinking parts
|
| 1573 |
+
parts = msg.get("parts", [])
|
| 1574 |
+
if parts:
|
|
|
|
| 1575 |
filtered = [
|
| 1576 |
+
p
|
| 1577 |
+
for p in parts
|
| 1578 |
+
if not (isinstance(p, dict) and p.get("thought") is True)
|
|
|
|
|
|
|
| 1579 |
]
|
| 1580 |
+
|
| 1581 |
+
has_function_calls = any(
|
| 1582 |
+
isinstance(p, dict) and "functionCall" in p for p in filtered
|
| 1583 |
+
)
|
| 1584 |
+
|
| 1585 |
if not filtered:
|
| 1586 |
+
msg["parts"] = [{"text": ""}] if not has_function_calls else []
|
| 1587 |
else:
|
| 1588 |
+
msg["parts"] = filtered
|
| 1589 |
return messages
|
| 1590 |
|
| 1591 |
def _looks_like_compacted_thinking_turn(self, msg: Dict[str, Any]) -> bool:
|
| 1592 |
"""
|
| 1593 |
Detect if a message looks like it was compacted from a thinking-enabled turn.
|
| 1594 |
|
| 1595 |
+
Heuristics (GEMINI format):
|
| 1596 |
+
1. Has functionCall parts (typical thinking flow produces tool calls)
|
| 1597 |
+
2. No thinking parts (thought: true)
|
| 1598 |
+
3. No text content before functionCall (thinking responses usually have text)
|
| 1599 |
|
| 1600 |
This is imperfect but helps catch common compaction scenarios.
|
| 1601 |
"""
|
| 1602 |
+
parts = msg.get("parts", [])
|
| 1603 |
+
if not parts:
|
| 1604 |
return False
|
| 1605 |
|
| 1606 |
+
has_function_call = any(
|
| 1607 |
+
isinstance(p, dict) and "functionCall" in p for p in parts
|
| 1608 |
+
)
|
| 1609 |
|
| 1610 |
+
if not has_function_call:
|
| 1611 |
+
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1612 |
|
| 1613 |
+
# Check for text content (not thinking)
|
| 1614 |
+
has_text = any(
|
| 1615 |
+
isinstance(p, dict)
|
| 1616 |
+
and "text" in p
|
| 1617 |
+
and p.get("text", "").strip()
|
| 1618 |
+
and not p.get("thought") # Exclude thinking text
|
| 1619 |
+
for p in parts
|
| 1620 |
+
)
|
| 1621 |
|
| 1622 |
+
# If we have functionCall but no non-thinking text, likely compacted
|
| 1623 |
+
if not has_text:
|
| 1624 |
return True
|
| 1625 |
|
| 1626 |
return False
|
|
|
|
| 1631 |
"""
|
| 1632 |
Try to recover thinking content from cache for a compacted turn.
|
| 1633 |
|
| 1634 |
+
Handles GEMINI format: extracts functionCall for cache key lookup,
|
| 1635 |
+
injects thinking as a part with thought: true.
|
| 1636 |
+
|
| 1637 |
Returns True if thinking was successfully recovered and injected, False otherwise.
|
| 1638 |
"""
|
| 1639 |
if turn_start_idx < 0 or turn_start_idx >= len(messages):
|
| 1640 |
return False
|
| 1641 |
|
| 1642 |
msg = messages[turn_start_idx]
|
| 1643 |
+
parts = msg.get("parts", [])
|
| 1644 |
|
| 1645 |
+
# Extract text content and build tool_calls structure for cache key lookup
|
| 1646 |
+
text_content = ""
|
| 1647 |
+
tool_calls = []
|
| 1648 |
+
|
| 1649 |
+
for part in parts:
|
| 1650 |
+
if isinstance(part, dict):
|
| 1651 |
+
if "text" in part and not part.get("thought"):
|
| 1652 |
+
text_content = part["text"]
|
| 1653 |
+
elif "functionCall" in part:
|
| 1654 |
+
fc = part["functionCall"]
|
| 1655 |
+
# Convert to OpenAI tool_calls format for cache key compatibility
|
| 1656 |
+
tool_calls.append(
|
| 1657 |
+
{
|
| 1658 |
+
"id": fc.get("id", ""),
|
| 1659 |
+
"type": "function",
|
| 1660 |
+
"function": {
|
| 1661 |
+
"name": fc.get("name", ""),
|
| 1662 |
+
"arguments": json.dumps(fc.get("args", {})),
|
| 1663 |
+
},
|
| 1664 |
+
}
|
| 1665 |
+
)
|
| 1666 |
|
| 1667 |
# Generate cache key and try to retrieve
|
| 1668 |
cache_key = self._generate_thinking_cache_key(text_content, tool_calls)
|
|
|
|
| 1687 |
)
|
| 1688 |
return False
|
| 1689 |
|
| 1690 |
+
# Inject the recovered thinking part at the beginning (Gemini format)
|
| 1691 |
+
thinking_part = {
|
| 1692 |
+
"text": thinking_text,
|
| 1693 |
+
"thought": True,
|
| 1694 |
+
"thoughtSignature": signature,
|
| 1695 |
}
|
| 1696 |
|
| 1697 |
+
msg["parts"] = [thinking_part] + parts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1698 |
|
| 1699 |
lib_logger.debug(
|
| 1700 |
f"[Thinking Sanitization] Recovered thinking from cache: {len(thinking_text)} chars"
|
|
|
|
| 1714 |
Close an incomplete tool loop by injecting synthetic messages to start a new turn.
|
| 1715 |
|
| 1716 |
This is used when:
|
| 1717 |
+
- We're in a tool loop (conversation ends with functionResponse)
|
| 1718 |
- The tool call was made WITHOUT thinking (e.g., by Gemini, non-thinking Claude, or compaction stripped it)
|
| 1719 |
- We NOW want to enable thinking
|
| 1720 |
|
|
|
|
| 1723 |
- "To toggle thinking, you must complete the assistant turn first"
|
| 1724 |
- A non-tool-result user message ends the turn and allows a fresh start
|
| 1725 |
|
| 1726 |
+
Solution (GEMINI format):
|
| 1727 |
+
1. Add synthetic MODEL message to complete the non-thinking turn
|
| 1728 |
2. Add synthetic USER message to start a NEW turn
|
| 1729 |
3. Claude will generate thinking for its response to the new turn
|
| 1730 |
|
|
|
|
| 1734 |
# Strip any old thinking first
|
| 1735 |
messages = self._strip_all_thinking_blocks(messages)
|
| 1736 |
|
| 1737 |
+
# Count tool results from the end of the conversation (Gemini format)
|
| 1738 |
+
tool_result_count = 0
|
| 1739 |
for msg in reversed(messages):
|
| 1740 |
+
if msg.get("role") == "user":
|
| 1741 |
+
parts = msg.get("parts", [])
|
| 1742 |
+
has_function_response = any(
|
| 1743 |
+
isinstance(p, dict) and "functionResponse" in p for p in parts
|
| 1744 |
+
)
|
| 1745 |
+
if has_function_response:
|
| 1746 |
+
tool_result_count += len(
|
| 1747 |
+
[
|
| 1748 |
+
p
|
| 1749 |
+
for p in parts
|
| 1750 |
+
if isinstance(p, dict) and "functionResponse" in p
|
| 1751 |
+
]
|
| 1752 |
+
)
|
| 1753 |
+
else:
|
| 1754 |
+
break # Real user message, stop counting
|
| 1755 |
+
elif msg.get("role") == "model":
|
| 1756 |
+
break # Stop at the model that made the tool calls
|
| 1757 |
|
| 1758 |
# Safety check: if no tool results found, this shouldn't have been called
|
| 1759 |
# But handle gracefully with a generic message
|
| 1760 |
+
if tool_result_count == 0:
|
| 1761 |
lib_logger.warning(
|
| 1762 |
"[Thinking Sanitization] _close_tool_loop_for_thinking called but no tool results found. "
|
| 1763 |
"This may indicate malformed conversation history."
|
| 1764 |
)
|
| 1765 |
+
synthetic_model_content = "[Processing previous context.]"
|
| 1766 |
+
elif tool_result_count == 1:
|
| 1767 |
+
synthetic_model_content = "[Tool execution completed.]"
|
| 1768 |
else:
|
| 1769 |
+
synthetic_model_content = (
|
| 1770 |
+
f"[{tool_result_count} tool executions completed.]"
|
| 1771 |
)
|
| 1772 |
|
| 1773 |
+
# Step 1: Inject synthetic MODEL message to complete the non-thinking turn (Gemini format)
|
| 1774 |
+
synthetic_model = {
|
| 1775 |
+
"role": "model",
|
| 1776 |
+
"parts": [{"text": synthetic_model_content}],
|
| 1777 |
}
|
| 1778 |
+
messages.append(synthetic_model)
|
| 1779 |
|
| 1780 |
+
# Step 2: Inject synthetic USER message to start a NEW turn (Gemini format)
|
| 1781 |
# This allows Claude to generate thinking for its response
|
| 1782 |
# The message is minimal and unobtrusive - just triggers a new turn
|
| 1783 |
+
synthetic_user = {
|
| 1784 |
+
"role": "user",
|
| 1785 |
+
"parts": [{"text": "[Continue]"}],
|
| 1786 |
+
}
|
| 1787 |
messages.append(synthetic_user)
|
| 1788 |
|
| 1789 |
lib_logger.info(
|
| 1790 |
f"[Thinking Sanitization] Closed tool loop with synthetic messages. "
|
| 1791 |
+
f"Model: '{synthetic_model_content}', User: '[Continue]'. "
|
| 1792 |
f"Claude will now start a fresh turn with thinking enabled."
|
| 1793 |
)
|
| 1794 |
|
|
|
|
| 3037 |
reasoning_effort is not None and reasoning_effort != "disable"
|
| 3038 |
)
|
| 3039 |
|
| 3040 |
+
# Transform messages to Gemini format FIRST
|
| 3041 |
+
# This restores thinking from cache if reasoning_content was stripped by client
|
| 3042 |
+
system_instruction, gemini_contents = self._transform_messages(messages, model)
|
| 3043 |
+
gemini_contents = self._fix_tool_response_grouping(gemini_contents)
|
| 3044 |
+
|
| 3045 |
+
# Sanitize thinking blocks for Claude AFTER transformation
|
| 3046 |
+
# Now we can see the full picture including cached thinking that was restored
|
| 3047 |
# This handles: context compression, model switching, mid-turn thinking toggle
|
|
|
|
| 3048 |
force_disable_thinking = False
|
| 3049 |
if self._is_claude(model) and self._enable_thinking_sanitization:
|
| 3050 |
+
gemini_contents, force_disable_thinking = (
|
| 3051 |
+
self._sanitize_thinking_for_claude(gemini_contents, thinking_enabled)
|
| 3052 |
)
|
| 3053 |
|
| 3054 |
# If we're in a mid-turn thinking toggle situation, we MUST disable thinking
|
|
|
|
| 3057 |
thinking_enabled = False
|
| 3058 |
reasoning_effort = "disable" # Force disable for this request
|
| 3059 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3060 |
# Build payload
|
| 3061 |
gemini_payload = {"contents": gemini_contents}
|
| 3062 |
|