Spaces:
Paused
Paused
Mirrowel
commited on
Commit
Β·
9c5dbdf
1
Parent(s):
a9943e6
fix(antigravity): π propagate usage stats in streaming auto-recovery
Browse filesThe streaming wrapper relies on `completion_tokens > 0` in the usage dictionary to correctly detect final chunks and terminate the stream. Previously, auto-recovery responses lacked this data, causing streams to hang.
- Add `_build_malformed_fallback_chunk` to generate compliant streaming error responses when retries are exhausted.
- Extract and propagate `last_usage` from the response accumulator during malformed function call exceptions.
- Ensure fixed tool call chunks and fallback chunks include valid usage data (forcing `completion_tokens` to 1 if necessary) to properly signal stream completion.
src/rotator_library/providers/antigravity_provider.py
CHANGED
|
@@ -3153,6 +3153,55 @@ Analyze what you did wrong, correct it, and retry the function call. Output ONLY
|
|
| 3153 |
}
|
| 3154 |
)
|
| 3155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3156 |
def _build_fixed_tool_call_response(
|
| 3157 |
self,
|
| 3158 |
model: str,
|
|
@@ -3216,6 +3265,7 @@ Analyze what you did wrong, correct it, and retry the function call. Output ONLY
|
|
| 3216 |
parsed_call: Dict[str, Any],
|
| 3217 |
error_info: Dict[str, Any],
|
| 3218 |
response_id: Optional[str] = None,
|
|
|
|
| 3219 |
) -> Optional[litellm.ModelResponse]:
|
| 3220 |
"""
|
| 3221 |
Build a streaming chunk with the auto-fixed tool call.
|
|
@@ -3227,6 +3277,8 @@ Analyze what you did wrong, correct it, and retry the function call. Output ONLY
|
|
| 3227 |
|
| 3228 |
Args:
|
| 3229 |
response_id: Optional original response ID to maintain stream continuity
|
|
|
|
|
|
|
| 3230 |
|
| 3231 |
Returns None if the JSON couldn't be fixed.
|
| 3232 |
"""
|
|
@@ -3245,6 +3297,16 @@ Analyze what you did wrong, correct it, and retry the function call. Output ONLY
|
|
| 3245 |
# Use original response ID if provided, otherwise generate new one
|
| 3246 |
chunk_id = response_id or f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
| 3247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3248 |
return litellm.ModelResponse(
|
| 3249 |
**{
|
| 3250 |
"id": chunk_id,
|
|
@@ -3272,6 +3334,7 @@ Analyze what you did wrong, correct it, and retry the function call. Output ONLY
|
|
| 3272 |
"finish_reason": "tool_calls",
|
| 3273 |
}
|
| 3274 |
],
|
|
|
|
| 3275 |
}
|
| 3276 |
)
|
| 3277 |
|
|
@@ -4641,6 +4704,14 @@ Analyze what you did wrong, correct it, and retry the function call. Output ONLY
|
|
| 4641 |
# Handle MALFORMED_FUNCTION_CALL - try auto-fix first
|
| 4642 |
parsed = self._parse_malformed_call_message(e.finish_message, model)
|
| 4643 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4644 |
if parsed:
|
| 4645 |
# Try to auto-fix the malformed JSON
|
| 4646 |
error_info = self._analyze_json_error(parsed["raw_args"])
|
|
@@ -4660,15 +4731,13 @@ Analyze what you did wrong, correct it, and retry the function call. Output ONLY
|
|
| 4660 |
error_info["fixed_json"],
|
| 4661 |
)
|
| 4662 |
|
| 4663 |
-
#
|
| 4664 |
-
response_id = None
|
| 4665 |
-
if e.raw_response and isinstance(e.raw_response, dict):
|
| 4666 |
-
acc = e.raw_response.get("accumulator", {})
|
| 4667 |
-
response_id = acc.get("response_id")
|
| 4668 |
-
|
| 4669 |
-
# Use chunk format for streaming with original response ID
|
| 4670 |
fixed_chunk = self._build_fixed_tool_call_chunk(
|
| 4671 |
-
model,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4672 |
)
|
| 4673 |
if fixed_chunk:
|
| 4674 |
yield fixed_chunk
|
|
@@ -4730,8 +4799,11 @@ Analyze what you did wrong, correct it, and retry the function call. Output ONLY
|
|
| 4730 |
f"[Antigravity] MALFORMED_FUNCTION_CALL could not be auto-fixed "
|
| 4731 |
f"for {model} (streaming): {e.finish_message[:100]}..."
|
| 4732 |
)
|
| 4733 |
-
fallback = self.
|
| 4734 |
-
model,
|
|
|
|
|
|
|
|
|
|
| 4735 |
)
|
| 4736 |
yield fallback
|
| 4737 |
return
|
|
|
|
| 3153 |
}
|
| 3154 |
)
|
| 3155 |
|
| 3156 |
+
def _build_malformed_fallback_chunk(
|
| 3157 |
+
self,
|
| 3158 |
+
model: str,
|
| 3159 |
+
error_details: str,
|
| 3160 |
+
response_id: Optional[str] = None,
|
| 3161 |
+
usage: Optional[Dict[str, Any]] = None,
|
| 3162 |
+
) -> litellm.ModelResponse:
|
| 3163 |
+
"""
|
| 3164 |
+
Build streaming chunk error response when malformed call retries are exhausted.
|
| 3165 |
+
|
| 3166 |
+
Uses streaming format (delta instead of message) for consistency with streaming responses.
|
| 3167 |
+
Includes usage with completion_tokens > 0 so client.py recognizes it as a final chunk.
|
| 3168 |
+
"""
|
| 3169 |
+
chunk_id = response_id or f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
| 3170 |
+
|
| 3171 |
+
# Ensure usage has completion_tokens > 0 for client to recognize as final chunk
|
| 3172 |
+
if not usage or usage.get("completion_tokens", 0) <= 0:
|
| 3173 |
+
prompt_tokens = usage.get("prompt_tokens", 0) if usage else 0
|
| 3174 |
+
usage = {
|
| 3175 |
+
"prompt_tokens": prompt_tokens,
|
| 3176 |
+
"completion_tokens": 1,
|
| 3177 |
+
"total_tokens": prompt_tokens + 1,
|
| 3178 |
+
}
|
| 3179 |
+
|
| 3180 |
+
return litellm.ModelResponse(
|
| 3181 |
+
**{
|
| 3182 |
+
"id": chunk_id,
|
| 3183 |
+
"object": "chat.completion.chunk",
|
| 3184 |
+
"created": int(time.time()),
|
| 3185 |
+
"model": model,
|
| 3186 |
+
"choices": [
|
| 3187 |
+
{
|
| 3188 |
+
"index": 0,
|
| 3189 |
+
"delta": {
|
| 3190 |
+
"role": "assistant",
|
| 3191 |
+
"content": (
|
| 3192 |
+
"[TOOL CALL ERROR] I attempted to call a function but "
|
| 3193 |
+
"repeatedly produced malformed syntax. This may be a model issue.\n\n"
|
| 3194 |
+
f"Last error: {error_details}\n\n"
|
| 3195 |
+
"Please try rephrasing your request or try a different approach."
|
| 3196 |
+
),
|
| 3197 |
+
},
|
| 3198 |
+
"finish_reason": "stop",
|
| 3199 |
+
}
|
| 3200 |
+
],
|
| 3201 |
+
"usage": usage,
|
| 3202 |
+
}
|
| 3203 |
+
)
|
| 3204 |
+
|
| 3205 |
def _build_fixed_tool_call_response(
|
| 3206 |
self,
|
| 3207 |
model: str,
|
|
|
|
| 3265 |
parsed_call: Dict[str, Any],
|
| 3266 |
error_info: Dict[str, Any],
|
| 3267 |
response_id: Optional[str] = None,
|
| 3268 |
+
usage: Optional[Dict[str, Any]] = None,
|
| 3269 |
) -> Optional[litellm.ModelResponse]:
|
| 3270 |
"""
|
| 3271 |
Build a streaming chunk with the auto-fixed tool call.
|
|
|
|
| 3277 |
|
| 3278 |
Args:
|
| 3279 |
response_id: Optional original response ID to maintain stream continuity
|
| 3280 |
+
usage: Optional usage from previous chunks. Must include completion_tokens > 0
|
| 3281 |
+
for client to recognize this as a final chunk.
|
| 3282 |
|
| 3283 |
Returns None if the JSON couldn't be fixed.
|
| 3284 |
"""
|
|
|
|
| 3297 |
# Use original response ID if provided, otherwise generate new one
|
| 3298 |
chunk_id = response_id or f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
| 3299 |
|
| 3300 |
+
# Ensure usage has completion_tokens > 0 for client to recognize as final chunk
|
| 3301 |
+
# Client.py's _safe_streaming_wrapper uses completion_tokens > 0 to detect final chunks
|
| 3302 |
+
if not usage or usage.get("completion_tokens", 0) <= 0:
|
| 3303 |
+
prompt_tokens = usage.get("prompt_tokens", 0) if usage else 0
|
| 3304 |
+
usage = {
|
| 3305 |
+
"prompt_tokens": prompt_tokens,
|
| 3306 |
+
"completion_tokens": 1, # Minimum to signal final chunk
|
| 3307 |
+
"total_tokens": prompt_tokens + 1,
|
| 3308 |
+
}
|
| 3309 |
+
|
| 3310 |
return litellm.ModelResponse(
|
| 3311 |
**{
|
| 3312 |
"id": chunk_id,
|
|
|
|
| 3334 |
"finish_reason": "tool_calls",
|
| 3335 |
}
|
| 3336 |
],
|
| 3337 |
+
"usage": usage,
|
| 3338 |
}
|
| 3339 |
)
|
| 3340 |
|
|
|
|
| 4704 |
# Handle MALFORMED_FUNCTION_CALL - try auto-fix first
|
| 4705 |
parsed = self._parse_malformed_call_message(e.finish_message, model)
|
| 4706 |
|
| 4707 |
+
# Extract response_id and last_usage from accumulator for all paths
|
| 4708 |
+
response_id = None
|
| 4709 |
+
last_usage = None
|
| 4710 |
+
if e.raw_response and isinstance(e.raw_response, dict):
|
| 4711 |
+
acc = e.raw_response.get("accumulator", {})
|
| 4712 |
+
response_id = acc.get("response_id")
|
| 4713 |
+
last_usage = acc.get("last_usage")
|
| 4714 |
+
|
| 4715 |
if parsed:
|
| 4716 |
# Try to auto-fix the malformed JSON
|
| 4717 |
error_info = self._analyze_json_error(parsed["raw_args"])
|
|
|
|
| 4731 |
error_info["fixed_json"],
|
| 4732 |
)
|
| 4733 |
|
| 4734 |
+
# Use chunk format for streaming with original response ID and usage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4735 |
fixed_chunk = self._build_fixed_tool_call_chunk(
|
| 4736 |
+
model,
|
| 4737 |
+
parsed,
|
| 4738 |
+
error_info,
|
| 4739 |
+
response_id=response_id,
|
| 4740 |
+
usage=last_usage,
|
| 4741 |
)
|
| 4742 |
if fixed_chunk:
|
| 4743 |
yield fixed_chunk
|
|
|
|
| 4799 |
f"[Antigravity] MALFORMED_FUNCTION_CALL could not be auto-fixed "
|
| 4800 |
f"for {model} (streaming): {e.finish_message[:100]}..."
|
| 4801 |
)
|
| 4802 |
+
fallback = self._build_malformed_fallback_chunk(
|
| 4803 |
+
model,
|
| 4804 |
+
e.finish_message,
|
| 4805 |
+
response_id=response_id,
|
| 4806 |
+
usage=last_usage,
|
| 4807 |
)
|
| 4808 |
yield fallback
|
| 4809 |
return
|