superxu520 commited on
Commit
dbb1ef4
·
1 Parent(s): 81934c5

"fix_backend_retry_logic"

Browse files
Files changed (1) hide show
  1. app/server/chat.py +20 -14
app/server/chat.py CHANGED
@@ -786,8 +786,7 @@ async def _send_with_split(
786
  content: str, file_list: list | None
787
  ) -> AsyncGenerator[ModelOutput, None]:
788
  """Manual retry logic for streaming."""
789
- # Reduced retries to 1 to minimize ban risk
790
- max_retries = 1
791
  for attempt in range(max_retries + 1):
792
  try:
793
  gen = session.send_message_stream(content, files=file_list)
@@ -798,19 +797,18 @@ async def _send_with_split(
798
  return
799
  except Exception as e:
800
  if has_yielded:
 
 
 
801
  raise e
802
 
803
- # Safety check: Do not retry if error indicates auth failure or rate limit
804
  error_str = str(e).lower()
805
- if "429" in error_str or "403" in error_str or "quota" in error_str:
 
806
  raise e
807
 
808
  if attempt < max_retries:
809
- # Increased delay with randomness to avoid pattern detection
810
- base_delay = 3.0
811
- jitter = random.uniform(0.5, 1.5)
812
- delay = base_delay + jitter
813
-
814
  logger.warning(f"Stream failed to start (attempt {attempt+1}/{max_retries}). Retrying in {delay:.2f}s. Error: {e}")
815
  await asyncio.sleep(delay)
816
  else:
@@ -818,18 +816,26 @@ async def _send_with_split(
818
 
819
  @retry_with_backoff(
820
  max_retries=3,
821
- base_delay=1.0,
822
  max_delay=30.0,
823
  exponential_base=2.0,
824
- retryable_exceptions=(ConnectionError, TimeoutError, OSError),
825
  )
826
  async def _send_with_retry(
827
  content: str, file_list: list | None, is_stream: bool
828
  ) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
829
  """Internal function with retry logic."""
830
- if is_stream:
831
- return _stream_with_retry(content, file_list)
832
- return await session.send_message(content, files=file_list)
 
 
 
 
 
 
 
 
833
 
834
  if len(text) <= MAX_CHARS_PER_REQUEST:
835
  try:
 
786
  content: str, file_list: list | None
787
  ) -> AsyncGenerator[ModelOutput, None]:
788
  """Manual retry logic for streaming."""
789
+ max_retries = 2 # Increased from 1
 
790
  for attempt in range(max_retries + 1):
791
  try:
792
  gen = session.send_message_stream(content, files=file_list)
 
797
  return
798
  except Exception as e:
799
  if has_yielded:
800
+ # If we already started yielding data, we can't easily retry the stream
801
+ # without sending duplicate content to the client.
802
+ logger.error(f"Stream interrupted after yielding data: {e}")
803
  raise e
804
 
 
805
  error_str = str(e).lower()
806
+ # Safety check: Do not retry if error indicates auth failure or rate limit
807
+ if any(code in error_str for code in ["429", "403", "401", "quota"]):
808
  raise e
809
 
810
  if attempt < max_retries:
811
+ delay = (attempt + 1) * 3.0 + random.uniform(0.5, 1.5)
 
 
 
 
812
  logger.warning(f"Stream failed to start (attempt {attempt+1}/{max_retries}). Retrying in {delay:.2f}s. Error: {e}")
813
  await asyncio.sleep(delay)
814
  else:
 
816
 
817
  @retry_with_backoff(
818
  max_retries=3,
819
+ base_delay=2.0,
820
  max_delay=30.0,
821
  exponential_base=2.0,
822
+ retryable_exceptions=(ConnectionError, TimeoutError, OSError, Exception), # Catch all for APIError
823
  )
824
  async def _send_with_retry(
825
  content: str, file_list: list | None, is_stream: bool
826
  ) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
827
  """Internal function with retry logic."""
828
+ try:
829
+ if is_stream:
830
+ return _stream_with_retry(content, file_list)
831
+ return await session.send_message(content, files=file_list)
832
+ except Exception as e:
833
+ # If it's a known "Stream interrupted" error, we want to retry
834
+ error_msg = str(e)
835
+ if "Stream interrupted" in error_msg or "truncated" in error_msg:
836
+ logger.warning(f"Gemini stream interrupted, triggering retry: {e}")
837
+ raise e # Let decorator handle retry
838
+ raise e
839
 
840
  if len(text) <= MAX_CHARS_PER_REQUEST:
841
  try: