Spaces:
Paused
Paused
Commit ·
dbb1ef4
1
Parent(s): 81934c5
"fix_backend_retry_logic"
Browse files- app/server/chat.py +20 -14
app/server/chat.py
CHANGED
|
@@ -786,8 +786,7 @@ async def _send_with_split(
|
|
| 786 |
content: str, file_list: list | None
|
| 787 |
) -> AsyncGenerator[ModelOutput, None]:
|
| 788 |
"""Manual retry logic for streaming."""
|
| 789 |
-
|
| 790 |
-
max_retries = 1
|
| 791 |
for attempt in range(max_retries + 1):
|
| 792 |
try:
|
| 793 |
gen = session.send_message_stream(content, files=file_list)
|
|
@@ -798,19 +797,18 @@ async def _send_with_split(
|
|
| 798 |
return
|
| 799 |
except Exception as e:
|
| 800 |
if has_yielded:
|
|
|
|
|
|
|
|
|
|
| 801 |
raise e
|
| 802 |
|
| 803 |
-
# Safety check: Do not retry if error indicates auth failure or rate limit
|
| 804 |
error_str = str(e).lower()
|
| 805 |
-
|
|
|
|
| 806 |
raise e
|
| 807 |
|
| 808 |
if attempt < max_retries:
|
| 809 |
-
|
| 810 |
-
base_delay = 3.0
|
| 811 |
-
jitter = random.uniform(0.5, 1.5)
|
| 812 |
-
delay = base_delay + jitter
|
| 813 |
-
|
| 814 |
logger.warning(f"Stream failed to start (attempt {attempt+1}/{max_retries}). Retrying in {delay:.2f}s. Error: {e}")
|
| 815 |
await asyncio.sleep(delay)
|
| 816 |
else:
|
|
@@ -818,18 +816,26 @@ async def _send_with_split(
|
|
| 818 |
|
| 819 |
@retry_with_backoff(
|
| 820 |
max_retries=3,
|
| 821 |
-
base_delay=
|
| 822 |
max_delay=30.0,
|
| 823 |
exponential_base=2.0,
|
| 824 |
-
retryable_exceptions=(ConnectionError, TimeoutError, OSError),
|
| 825 |
)
|
| 826 |
async def _send_with_retry(
|
| 827 |
content: str, file_list: list | None, is_stream: bool
|
| 828 |
) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
|
| 829 |
"""Internal function with retry logic."""
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 833 |
|
| 834 |
if len(text) <= MAX_CHARS_PER_REQUEST:
|
| 835 |
try:
|
|
|
|
| 786 |
content: str, file_list: list | None
|
| 787 |
) -> AsyncGenerator[ModelOutput, None]:
|
| 788 |
"""Manual retry logic for streaming."""
|
| 789 |
+
max_retries = 2 # Increased from 1
|
|
|
|
| 790 |
for attempt in range(max_retries + 1):
|
| 791 |
try:
|
| 792 |
gen = session.send_message_stream(content, files=file_list)
|
|
|
|
| 797 |
return
|
| 798 |
except Exception as e:
|
| 799 |
if has_yielded:
|
| 800 |
+
# If we already started yielding data, we can't easily retry the stream
|
| 801 |
+
# without sending duplicate content to the client.
|
| 802 |
+
logger.error(f"Stream interrupted after yielding data: {e}")
|
| 803 |
raise e
|
| 804 |
|
|
|
|
| 805 |
error_str = str(e).lower()
|
| 806 |
+
# Safety check: Do not retry if error indicates auth failure or rate limit
|
| 807 |
+
if any(code in error_str for code in ["429", "403", "401", "quota"]):
|
| 808 |
raise e
|
| 809 |
|
| 810 |
if attempt < max_retries:
|
| 811 |
+
delay = (attempt + 1) * 3.0 + random.uniform(0.5, 1.5)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 812 |
logger.warning(f"Stream failed to start (attempt {attempt+1}/{max_retries}). Retrying in {delay:.2f}s. Error: {e}")
|
| 813 |
await asyncio.sleep(delay)
|
| 814 |
else:
|
|
|
|
| 816 |
|
| 817 |
@retry_with_backoff(
|
| 818 |
max_retries=3,
|
| 819 |
+
base_delay=2.0,
|
| 820 |
max_delay=30.0,
|
| 821 |
exponential_base=2.0,
|
| 822 |
+
retryable_exceptions=(ConnectionError, TimeoutError, OSError, Exception), # Catch all for APIError
|
| 823 |
)
|
| 824 |
async def _send_with_retry(
|
| 825 |
content: str, file_list: list | None, is_stream: bool
|
| 826 |
) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
|
| 827 |
"""Internal function with retry logic."""
|
| 828 |
+
try:
|
| 829 |
+
if is_stream:
|
| 830 |
+
return _stream_with_retry(content, file_list)
|
| 831 |
+
return await session.send_message(content, files=file_list)
|
| 832 |
+
except Exception as e:
|
| 833 |
+
# If it's a known "Stream interrupted" error, we want to retry
|
| 834 |
+
error_msg = str(e)
|
| 835 |
+
if "Stream interrupted" in error_msg or "truncated" in error_msg:
|
| 836 |
+
logger.warning(f"Gemini stream interrupted, triggering retry: {e}")
|
| 837 |
+
raise e # Let decorator handle retry
|
| 838 |
+
raise e
|
| 839 |
|
| 840 |
if len(text) <= MAX_CHARS_PER_REQUEST:
|
| 841 |
try:
|