Spaces:
Paused
Paused
Commit ·
25bce68
1
Parent(s): dbb1ef4
"fix_retry_timeout_for_cloudflare_524"
Browse files- app/server/chat.py +8 -11
app/server/chat.py
CHANGED
|
@@ -786,7 +786,7 @@ async def _send_with_split(
|
|
| 786 |
content: str, file_list: list | None
|
| 787 |
) -> AsyncGenerator[ModelOutput, None]:
|
| 788 |
"""Manual retry logic for streaming."""
|
| 789 |
-
max_retries =
|
| 790 |
for attempt in range(max_retries + 1):
|
| 791 |
try:
|
| 792 |
gen = session.send_message_stream(content, files=file_list)
|
|
@@ -797,29 +797,26 @@ async def _send_with_split(
|
|
| 797 |
return
|
| 798 |
except Exception as e:
|
| 799 |
if has_yielded:
|
| 800 |
-
# If we already started yielding data, we can't easily retry the stream
|
| 801 |
-
# without sending duplicate content to the client.
|
| 802 |
logger.error(f"Stream interrupted after yielding data: {e}")
|
| 803 |
raise e
|
| 804 |
|
| 805 |
error_str = str(e).lower()
|
| 806 |
-
# Safety check: Do not retry if error indicates auth failure or rate limit
|
| 807 |
if any(code in error_str for code in ["429", "403", "401", "quota"]):
|
| 808 |
raise e
|
| 809 |
|
| 810 |
if attempt < max_retries:
|
| 811 |
-
delay =
|
| 812 |
logger.warning(f"Stream failed to start (attempt {attempt+1}/{max_retries}). Retrying in {delay:.2f}s. Error: {e}")
|
| 813 |
await asyncio.sleep(delay)
|
| 814 |
else:
|
| 815 |
raise e
|
| 816 |
|
| 817 |
@retry_with_backoff(
|
| 818 |
-
max_retries=
|
| 819 |
base_delay=2.0,
|
| 820 |
-
max_delay=
|
| 821 |
exponential_base=2.0,
|
| 822 |
-
retryable_exceptions=(ConnectionError, TimeoutError, OSError, Exception),
|
| 823 |
)
|
| 824 |
async def _send_with_retry(
|
| 825 |
content: str, file_list: list | None, is_stream: bool
|
|
@@ -830,11 +827,11 @@ async def _send_with_split(
|
|
| 830 |
return _stream_with_retry(content, file_list)
|
| 831 |
return await session.send_message(content, files=file_list)
|
| 832 |
except Exception as e:
|
| 833 |
-
# If it's a known "Stream interrupted" error, we want to retry
|
| 834 |
error_msg = str(e)
|
|
|
|
| 835 |
if "Stream interrupted" in error_msg or "truncated" in error_msg:
|
| 836 |
-
logger.warning(f"Gemini stream interrupted
|
| 837 |
-
raise e
|
| 838 |
raise e
|
| 839 |
|
| 840 |
if len(text) <= MAX_CHARS_PER_REQUEST:
|
|
|
|
| 786 |
content: str, file_list: list | None
|
| 787 |
) -> AsyncGenerator[ModelOutput, None]:
|
| 788 |
"""Manual retry logic for streaming."""
|
| 789 |
+
max_retries = 1 # 再次减少到 1 次,确保总时长可控
|
| 790 |
for attempt in range(max_retries + 1):
|
| 791 |
try:
|
| 792 |
gen = session.send_message_stream(content, files=file_list)
|
|
|
|
| 797 |
return
|
| 798 |
except Exception as e:
|
| 799 |
if has_yielded:
|
|
|
|
|
|
|
| 800 |
logger.error(f"Stream interrupted after yielding data: {e}")
|
| 801 |
raise e
|
| 802 |
|
| 803 |
error_str = str(e).lower()
|
|
|
|
| 804 |
if any(code in error_str for code in ["429", "403", "401", "quota"]):
|
| 805 |
raise e
|
| 806 |
|
| 807 |
if attempt < max_retries:
|
| 808 |
+
delay = 2.0 + random.uniform(0.1, 0.5) # 缩短延迟
|
| 809 |
logger.warning(f"Stream failed to start (attempt {attempt+1}/{max_retries}). Retrying in {delay:.2f}s. Error: {e}")
|
| 810 |
await asyncio.sleep(delay)
|
| 811 |
else:
|
| 812 |
raise e
|
| 813 |
|
| 814 |
@retry_with_backoff(
|
| 815 |
+
max_retries=1, # 减少重试次数,避免触发 Cloudflare 524 超时
|
| 816 |
base_delay=2.0,
|
| 817 |
+
max_delay=10.0,
|
| 818 |
exponential_base=2.0,
|
| 819 |
+
retryable_exceptions=(ConnectionError, TimeoutError, OSError, Exception),
|
| 820 |
)
|
| 821 |
async def _send_with_retry(
|
| 822 |
content: str, file_list: list | None, is_stream: bool
|
|
|
|
| 827 |
return _stream_with_retry(content, file_list)
|
| 828 |
return await session.send_message(content, files=file_list)
|
| 829 |
except Exception as e:
|
|
|
|
| 830 |
error_msg = str(e)
|
| 831 |
+
# 如果是流中断,记录警告并抛出以触发重试
|
| 832 |
if "Stream interrupted" in error_msg or "truncated" in error_msg:
|
| 833 |
+
logger.warning(f"Gemini stream interrupted (Session: {session.sid}): {e}")
|
| 834 |
+
raise e
|
| 835 |
raise e
|
| 836 |
|
| 837 |
if len(text) <= MAX_CHARS_PER_REQUEST:
|