Mirrowel commited on
Commit
74f9532
·
1 Parent(s): 868b7c9

feat(antigravity): ✨ add thinking mode toggling for mid-conversation model switches

Browse files

This commit introduces intelligent handling of Claude's thinking mode when switching models mid-conversation during incomplete tool use loops.

**New Features:**
- Auto-detection of incomplete tool turns (when messages end with tool results without assistant completion)
- Configurable turn completion injection via `ANTIGRAVITY_AUTO_INJECT_TURN_COMPLETION` (default: true)
- Configurable thinking mode suppression via `ANTIGRAVITY_AUTO_SUPPRESS_THINKING` (default: false)
- Customizable turn completion placeholder text via `ANTIGRAVITY_TURN_COMPLETION_TEXT` (default: "...")

**Implementation Details:**
- `_detect_incomplete_tool_turn()`: Analyzes message history to identify incomplete tool use patterns
- `_inject_turn_completion()`: Appends a synthetic assistant message to close incomplete turns
- `_handle_thinking_mode_toggle()`: Orchestrates the toggling strategy based on configuration

**Behavior:**
When switching to Claude with thinking mode enabled during an incomplete tool loop:
1. If auto-injection is enabled: Inject a completion message to allow thinking mode
2. If auto-suppression is enabled: Disable thinking mode to prevent API errors
3. If both disabled: Allow the request to proceed (likely resulting in API error)

This resolves API compatibility issues when transitioning between models with different conversation state requirements.

src/rotator_library/providers/antigravity_provider.py CHANGED
@@ -331,6 +331,11 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
331
  self._enable_dynamic_models = _env_bool("ANTIGRAVITY_ENABLE_DYNAMIC_MODELS", False)
332
  self._enable_gemini3_tool_fix = _env_bool("ANTIGRAVITY_GEMINI3_TOOL_FIX", True)
333
 
 
 
 
 
 
334
  # Gemini 3 tool fix configuration
335
  self._gemini3_tool_prefix = os.getenv("ANTIGRAVITY_GEMINI3_TOOL_PREFIX", "gemini3_")
336
  self._gemini3_description_prompt = os.getenv(
@@ -1324,6 +1329,142 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1324
 
1325
  return [f"antigravity/{m}" for m in AVAILABLE_MODELS]
1326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1327
  async def acompletion(
1328
  self,
1329
  client: httpx.AsyncClient,
@@ -1353,6 +1494,13 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1353
  # Create logger
1354
  file_logger = AntigravityFileLogger(model, enable_logging)
1355
 
 
 
 
 
 
 
 
1356
  # Transform messages
1357
  system_instruction, gemini_contents = self._transform_messages(messages, model)
1358
  gemini_contents = self._fix_tool_response_grouping(gemini_contents)
 
331
  self._enable_dynamic_models = _env_bool("ANTIGRAVITY_ENABLE_DYNAMIC_MODELS", False)
332
  self._enable_gemini3_tool_fix = _env_bool("ANTIGRAVITY_GEMINI3_TOOL_FIX", True)
333
 
334
+ # Thinking mode toggling behavior
335
+ self._auto_inject_turn_completion = _env_bool("ANTIGRAVITY_AUTO_INJECT_TURN_COMPLETION", True)
336
+ self._auto_suppress_thinking = _env_bool("ANTIGRAVITY_AUTO_SUPPRESS_THINKING", False)
337
+ self._turn_completion_placeholder = os.getenv("ANTIGRAVITY_TURN_COMPLETION_TEXT", "...")
338
+
339
  # Gemini 3 tool fix configuration
340
  self._gemini3_tool_prefix = os.getenv("ANTIGRAVITY_GEMINI3_TOOL_PREFIX", "gemini3_")
341
  self._gemini3_description_prompt = os.getenv(
 
1329
 
1330
  return [f"antigravity/{m}" for m in AVAILABLE_MODELS]
1331
 
1332
+ # =========================================================================
1333
+ # THINKING MODE TOGGLING HELPERS
1334
+ # =========================================================================
1335
+
1336
+ def _detect_incomplete_tool_turn(self, messages: List[Dict[str, Any]]) -> Optional[int]:
1337
+ """
1338
+ Detect if messages end with an incomplete tool use loop.
1339
+
1340
+ An incomplete tool turn is when:
1341
+ - Last message is a tool result
1342
+ - The assistant message that made the tool call hasn't been completed
1343
+ with a final text response
1344
+
1345
+ Returns:
1346
+ Index of the assistant message with tool_calls if incomplete turn detected,
1347
+ None otherwise
1348
+ """
1349
+ if len(messages) < 2:
1350
+ return None
1351
+
1352
+ # Last message must be tool result
1353
+ if messages[-1].get("role") != "tool":
1354
+ return None
1355
+
1356
+ # Find the assistant message that made the tool call
1357
+ for i in range(len(messages) - 2, -1, -1):
1358
+ msg = messages[i]
1359
+ if msg.get("role") == "assistant":
1360
+ if msg.get("tool_calls"):
1361
+ # Check if turn was completed by a subsequent assistant message
1362
+ for j in range(i + 1, len(messages)):
1363
+ if messages[j].get("role") == "assistant" and not messages[j].get("tool_calls"):
1364
+ return None # Turn completed
1365
+
1366
+ # Incomplete turn found
1367
+ lib_logger.debug(
1368
+ f"Detected incomplete tool turn: assistant message at index {i} "
1369
+ f"has tool_calls, but no completing text response found"
1370
+ )
1371
+ return i
1372
+ else:
1373
+ # Found completing assistant message
1374
+ return None
1375
+
1376
+ return None
1377
+
1378
+ def _inject_turn_completion(
1379
+ self,
1380
+ messages: List[Dict[str, Any]],
1381
+ incomplete_turn_index: int
1382
+ ) -> List[Dict[str, Any]]:
1383
+ """
1384
+ Inject a completing assistant message to close an incomplete tool use turn.
1385
+
1386
+ Args:
1387
+ messages: Original message list
1388
+ incomplete_turn_index: Index of the assistant message with tool_calls
1389
+
1390
+ Returns:
1391
+ Modified message list with injected completion
1392
+ """
1393
+ completion_msg = {
1394
+ "role": "assistant",
1395
+ "content": self._turn_completion_placeholder
1396
+ }
1397
+
1398
+ # Append to close the turn
1399
+ modified_messages = messages.copy()
1400
+ modified_messages.append(completion_msg)
1401
+
1402
+ lib_logger.info(
1403
+ f"Injected turn-completing assistant message ('{self._turn_completion_placeholder}') "
1404
+ f"to enable thinking mode. Original tool use started at message index {incomplete_turn_index}."
1405
+ )
1406
+
1407
+ return modified_messages
1408
+
1409
+ def _handle_thinking_mode_toggle(
1410
+ self,
1411
+ messages: List[Dict[str, Any]],
1412
+ model: str,
1413
+ reasoning_effort: Optional[str]
1414
+ ) -> Tuple[List[Dict[str, Any]], Optional[str]]:
1415
+ """
1416
+ Handle thinking mode toggling when switching models mid-conversation.
1417
+
1418
+ When switching to Claude with thinking enabled, but the conversation has
1419
+ an incomplete tool use loop from another model, either:
1420
+ 1. Inject a completing message to close the turn (if auto_inject enabled)
1421
+ 2. Suppress thinking mode (if auto_suppress enabled)
1422
+ 3. Let it fail with API error (if both disabled)
1423
+
1424
+ Args:
1425
+ messages: Original message list
1426
+ model: Target model
1427
+ reasoning_effort: Requested reasoning effort level
1428
+
1429
+ Returns:
1430
+ (modified_messages, modified_reasoning_effort)
1431
+ """
1432
+ # Only applies when trying to enable thinking on Claude
1433
+ if not self._is_claude(model) or not reasoning_effort:
1434
+ return messages, reasoning_effort
1435
+
1436
+ incomplete_turn_index = self._detect_incomplete_tool_turn(messages)
1437
+ if incomplete_turn_index is None:
1438
+ # No incomplete turn - proceed normally
1439
+ return messages, reasoning_effort
1440
+
1441
+ # Strategy 1: Auto-inject turn completion (preferred)
1442
+ if self._auto_inject_turn_completion:
1443
+ lib_logger.info(
1444
+ "Model switch to Claude with thinking detected mid-tool-use-loop. "
1445
+ "Injecting turn completion to enable thinking mode."
1446
+ )
1447
+ modified_messages = self._inject_turn_completion(messages, incomplete_turn_index)
1448
+ return modified_messages, reasoning_effort
1449
+
1450
+ # Strategy 2: Auto-suppress thinking
1451
+ if self._auto_suppress_thinking:
1452
+ lib_logger.warning(
1453
+ f"Model switch to Claude with thinking detected mid-tool-use-loop. "
1454
+ f"Suppressing reasoning_effort={reasoning_effort} to avoid API error. "
1455
+ f"Set ANTIGRAVITY_AUTO_INJECT_TURN_COMPLETION=true to inject completion instead."
1456
+ )
1457
+ return messages, None
1458
+
1459
+ # Strategy 3: Let it fail (user wants to handle it themselves)
1460
+ lib_logger.warning(
1461
+ "Model switch to Claude with thinking detected mid-tool-use-loop. "
1462
+ "Both auto-injection and auto-suppression are disabled. "
1463
+ "Request will likely fail with API error. "
1464
+ f"Enable ANTIGRAVITY_AUTO_INJECT_TURN_COMPLETION or ANTIGRAVITY_AUTO_SUPPRESS_THINKING."
1465
+ )
1466
+ return messages, reasoning_effort
1467
+
1468
  async def acompletion(
1469
  self,
1470
  client: httpx.AsyncClient,
 
1494
  # Create logger
1495
  file_logger = AntigravityFileLogger(model, enable_logging)
1496
 
1497
+ # Handle thinking mode toggling for model switches
1498
+ messages, reasoning_effort = self._handle_thinking_mode_toggle(messages, model, reasoning_effort)
1499
+ if reasoning_effort != kwargs.get("reasoning_effort"):
1500
+ kwargs["reasoning_effort"] = reasoning_effort
1501
+ if messages != kwargs.get("messages"):
1502
+ kwargs["messages"] = messages
1503
+
1504
  # Transform messages
1505
  system_instruction, gemini_contents = self._transform_messages(messages, model)
1506
  gemini_contents = self._fix_tool_response_grouping(gemini_contents)