Spaces:
Paused
feat(antigravity): ✨ add thinking mode toggling for mid-conversation model switches
Browse filesThis commit introduces intelligent handling of Claude's thinking mode when switching models mid-conversation during incomplete tool use loops.
**New Features:**
- Auto-detection of incomplete tool turns (when messages end with tool results without assistant completion)
- Configurable turn completion injection via `ANTIGRAVITY_AUTO_INJECT_TURN_COMPLETION` (default: true)
- Configurable thinking mode suppression via `ANTIGRAVITY_AUTO_SUPPRESS_THINKING` (default: false)
- Customizable turn completion placeholder text via `ANTIGRAVITY_TURN_COMPLETION_TEXT` (default: "...")
**Implementation Details:**
- `_detect_incomplete_tool_turn()`: Analyzes message history to identify incomplete tool use patterns
- `_inject_turn_completion()`: Appends a synthetic assistant message to close incomplete turns
- `_handle_thinking_mode_toggle()`: Orchestrates the toggling strategy based on configuration
**Behavior:**
When switching to Claude with thinking mode enabled during an incomplete tool loop:
1. If auto-injection is enabled: Inject a completion message to allow thinking mode
2. If auto-suppression is enabled: Disable thinking mode to prevent API errors
3. If both disabled: Allow the request to proceed (likely resulting in API error)
This resolves API compatibility issues when transitioning between models with different conversation state requirements.
|
@@ -331,6 +331,11 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 331 |
self._enable_dynamic_models = _env_bool("ANTIGRAVITY_ENABLE_DYNAMIC_MODELS", False)
|
| 332 |
self._enable_gemini3_tool_fix = _env_bool("ANTIGRAVITY_GEMINI3_TOOL_FIX", True)
|
| 333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
# Gemini 3 tool fix configuration
|
| 335 |
self._gemini3_tool_prefix = os.getenv("ANTIGRAVITY_GEMINI3_TOOL_PREFIX", "gemini3_")
|
| 336 |
self._gemini3_description_prompt = os.getenv(
|
|
@@ -1324,6 +1329,142 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1324 |
|
| 1325 |
return [f"antigravity/{m}" for m in AVAILABLE_MODELS]
|
| 1326 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1327 |
async def acompletion(
|
| 1328 |
self,
|
| 1329 |
client: httpx.AsyncClient,
|
|
@@ -1353,6 +1494,13 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
|
|
| 1353 |
# Create logger
|
| 1354 |
file_logger = AntigravityFileLogger(model, enable_logging)
|
| 1355 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1356 |
# Transform messages
|
| 1357 |
system_instruction, gemini_contents = self._transform_messages(messages, model)
|
| 1358 |
gemini_contents = self._fix_tool_response_grouping(gemini_contents)
|
|
|
|
| 331 |
self._enable_dynamic_models = _env_bool("ANTIGRAVITY_ENABLE_DYNAMIC_MODELS", False)
|
| 332 |
self._enable_gemini3_tool_fix = _env_bool("ANTIGRAVITY_GEMINI3_TOOL_FIX", True)
|
| 333 |
|
| 334 |
+
# Thinking mode toggling behavior
|
| 335 |
+
self._auto_inject_turn_completion = _env_bool("ANTIGRAVITY_AUTO_INJECT_TURN_COMPLETION", True)
|
| 336 |
+
self._auto_suppress_thinking = _env_bool("ANTIGRAVITY_AUTO_SUPPRESS_THINKING", False)
|
| 337 |
+
self._turn_completion_placeholder = os.getenv("ANTIGRAVITY_TURN_COMPLETION_TEXT", "...")
|
| 338 |
+
|
| 339 |
# Gemini 3 tool fix configuration
|
| 340 |
self._gemini3_tool_prefix = os.getenv("ANTIGRAVITY_GEMINI3_TOOL_PREFIX", "gemini3_")
|
| 341 |
self._gemini3_description_prompt = os.getenv(
|
|
|
|
| 1329 |
|
| 1330 |
return [f"antigravity/{m}" for m in AVAILABLE_MODELS]
|
| 1331 |
|
| 1332 |
+
# =========================================================================
|
| 1333 |
+
# THINKING MODE TOGGLING HELPERS
|
| 1334 |
+
# =========================================================================
|
| 1335 |
+
|
| 1336 |
+
def _detect_incomplete_tool_turn(self, messages: List[Dict[str, Any]]) -> Optional[int]:
|
| 1337 |
+
"""
|
| 1338 |
+
Detect if messages end with an incomplete tool use loop.
|
| 1339 |
+
|
| 1340 |
+
An incomplete tool turn is when:
|
| 1341 |
+
- Last message is a tool result
|
| 1342 |
+
- The assistant message that made the tool call hasn't been completed
|
| 1343 |
+
with a final text response
|
| 1344 |
+
|
| 1345 |
+
Returns:
|
| 1346 |
+
Index of the assistant message with tool_calls if incomplete turn detected,
|
| 1347 |
+
None otherwise
|
| 1348 |
+
"""
|
| 1349 |
+
if len(messages) < 2:
|
| 1350 |
+
return None
|
| 1351 |
+
|
| 1352 |
+
# Last message must be tool result
|
| 1353 |
+
if messages[-1].get("role") != "tool":
|
| 1354 |
+
return None
|
| 1355 |
+
|
| 1356 |
+
# Find the assistant message that made the tool call
|
| 1357 |
+
for i in range(len(messages) - 2, -1, -1):
|
| 1358 |
+
msg = messages[i]
|
| 1359 |
+
if msg.get("role") == "assistant":
|
| 1360 |
+
if msg.get("tool_calls"):
|
| 1361 |
+
# Check if turn was completed by a subsequent assistant message
|
| 1362 |
+
for j in range(i + 1, len(messages)):
|
| 1363 |
+
if messages[j].get("role") == "assistant" and not messages[j].get("tool_calls"):
|
| 1364 |
+
return None # Turn completed
|
| 1365 |
+
|
| 1366 |
+
# Incomplete turn found
|
| 1367 |
+
lib_logger.debug(
|
| 1368 |
+
f"Detected incomplete tool turn: assistant message at index {i} "
|
| 1369 |
+
f"has tool_calls, but no completing text response found"
|
| 1370 |
+
)
|
| 1371 |
+
return i
|
| 1372 |
+
else:
|
| 1373 |
+
# Found completing assistant message
|
| 1374 |
+
return None
|
| 1375 |
+
|
| 1376 |
+
return None
|
| 1377 |
+
|
| 1378 |
+
def _inject_turn_completion(
|
| 1379 |
+
self,
|
| 1380 |
+
messages: List[Dict[str, Any]],
|
| 1381 |
+
incomplete_turn_index: int
|
| 1382 |
+
) -> List[Dict[str, Any]]:
|
| 1383 |
+
"""
|
| 1384 |
+
Inject a completing assistant message to close an incomplete tool use turn.
|
| 1385 |
+
|
| 1386 |
+
Args:
|
| 1387 |
+
messages: Original message list
|
| 1388 |
+
incomplete_turn_index: Index of the assistant message with tool_calls
|
| 1389 |
+
|
| 1390 |
+
Returns:
|
| 1391 |
+
Modified message list with injected completion
|
| 1392 |
+
"""
|
| 1393 |
+
completion_msg = {
|
| 1394 |
+
"role": "assistant",
|
| 1395 |
+
"content": self._turn_completion_placeholder
|
| 1396 |
+
}
|
| 1397 |
+
|
| 1398 |
+
# Append to close the turn
|
| 1399 |
+
modified_messages = messages.copy()
|
| 1400 |
+
modified_messages.append(completion_msg)
|
| 1401 |
+
|
| 1402 |
+
lib_logger.info(
|
| 1403 |
+
f"Injected turn-completing assistant message ('{self._turn_completion_placeholder}') "
|
| 1404 |
+
f"to enable thinking mode. Original tool use started at message index {incomplete_turn_index}."
|
| 1405 |
+
)
|
| 1406 |
+
|
| 1407 |
+
return modified_messages
|
| 1408 |
+
|
| 1409 |
+
def _handle_thinking_mode_toggle(
|
| 1410 |
+
self,
|
| 1411 |
+
messages: List[Dict[str, Any]],
|
| 1412 |
+
model: str,
|
| 1413 |
+
reasoning_effort: Optional[str]
|
| 1414 |
+
) -> Tuple[List[Dict[str, Any]], Optional[str]]:
|
| 1415 |
+
"""
|
| 1416 |
+
Handle thinking mode toggling when switching models mid-conversation.
|
| 1417 |
+
|
| 1418 |
+
When switching to Claude with thinking enabled, but the conversation has
|
| 1419 |
+
an incomplete tool use loop from another model, either:
|
| 1420 |
+
1. Inject a completing message to close the turn (if auto_inject enabled)
|
| 1421 |
+
2. Suppress thinking mode (if auto_suppress enabled)
|
| 1422 |
+
3. Let it fail with API error (if both disabled)
|
| 1423 |
+
|
| 1424 |
+
Args:
|
| 1425 |
+
messages: Original message list
|
| 1426 |
+
model: Target model
|
| 1427 |
+
reasoning_effort: Requested reasoning effort level
|
| 1428 |
+
|
| 1429 |
+
Returns:
|
| 1430 |
+
(modified_messages, modified_reasoning_effort)
|
| 1431 |
+
"""
|
| 1432 |
+
# Only applies when trying to enable thinking on Claude
|
| 1433 |
+
if not self._is_claude(model) or not reasoning_effort:
|
| 1434 |
+
return messages, reasoning_effort
|
| 1435 |
+
|
| 1436 |
+
incomplete_turn_index = self._detect_incomplete_tool_turn(messages)
|
| 1437 |
+
if incomplete_turn_index is None:
|
| 1438 |
+
# No incomplete turn - proceed normally
|
| 1439 |
+
return messages, reasoning_effort
|
| 1440 |
+
|
| 1441 |
+
# Strategy 1: Auto-inject turn completion (preferred)
|
| 1442 |
+
if self._auto_inject_turn_completion:
|
| 1443 |
+
lib_logger.info(
|
| 1444 |
+
"Model switch to Claude with thinking detected mid-tool-use-loop. "
|
| 1445 |
+
"Injecting turn completion to enable thinking mode."
|
| 1446 |
+
)
|
| 1447 |
+
modified_messages = self._inject_turn_completion(messages, incomplete_turn_index)
|
| 1448 |
+
return modified_messages, reasoning_effort
|
| 1449 |
+
|
| 1450 |
+
# Strategy 2: Auto-suppress thinking
|
| 1451 |
+
if self._auto_suppress_thinking:
|
| 1452 |
+
lib_logger.warning(
|
| 1453 |
+
f"Model switch to Claude with thinking detected mid-tool-use-loop. "
|
| 1454 |
+
f"Suppressing reasoning_effort={reasoning_effort} to avoid API error. "
|
| 1455 |
+
f"Set ANTIGRAVITY_AUTO_INJECT_TURN_COMPLETION=true to inject completion instead."
|
| 1456 |
+
)
|
| 1457 |
+
return messages, None
|
| 1458 |
+
|
| 1459 |
+
# Strategy 3: Let it fail (user wants to handle it themselves)
|
| 1460 |
+
lib_logger.warning(
|
| 1461 |
+
"Model switch to Claude with thinking detected mid-tool-use-loop. "
|
| 1462 |
+
"Both auto-injection and auto-suppression are disabled. "
|
| 1463 |
+
"Request will likely fail with API error. "
|
| 1464 |
+
f"Enable ANTIGRAVITY_AUTO_INJECT_TURN_COMPLETION or ANTIGRAVITY_AUTO_SUPPRESS_THINKING."
|
| 1465 |
+
)
|
| 1466 |
+
return messages, reasoning_effort
|
| 1467 |
+
|
| 1468 |
async def acompletion(
|
| 1469 |
self,
|
| 1470 |
client: httpx.AsyncClient,
|
|
|
|
| 1494 |
# Create logger
|
| 1495 |
file_logger = AntigravityFileLogger(model, enable_logging)
|
| 1496 |
|
| 1497 |
+
# Handle thinking mode toggling for model switches
|
| 1498 |
+
messages, reasoning_effort = self._handle_thinking_mode_toggle(messages, model, reasoning_effort)
|
| 1499 |
+
if reasoning_effort != kwargs.get("reasoning_effort"):
|
| 1500 |
+
kwargs["reasoning_effort"] = reasoning_effort
|
| 1501 |
+
if messages != kwargs.get("messages"):
|
| 1502 |
+
kwargs["messages"] = messages
|
| 1503 |
+
|
| 1504 |
# Transform messages
|
| 1505 |
system_instruction, gemini_contents = self._transform_messages(messages, model)
|
| 1506 |
gemini_contents = self._fix_tool_response_grouping(gemini_contents)
|