superxu520 commited on
Commit
b2c2a89
·
1 Parent(s): 917fdb0

update: 同步最新代码与配置

Browse files
Files changed (5) hide show
  1. app/main.py +1 -2
  2. app/server/chat.py +90 -9
  3. app/services/client.py +16 -8
  4. config/config.yaml +4 -4
  5. run.py +4 -7
app/main.py CHANGED
@@ -2,7 +2,6 @@ import asyncio
2
  from contextlib import asynccontextmanager
3
 
4
  from fastapi import FastAPI
5
- from fastapi.responses import ORJSONResponse
6
  from loguru import logger
7
 
8
  from .server.chat import router as chat_router
@@ -93,7 +92,7 @@ def create_app() -> FastAPI:
93
  description="OpenAI-compatible API for Gemini Web",
94
  version="1.0.0",
95
  lifespan=lifespan,
96
- default_response_class=ORJSONResponse,
97
  )
98
 
99
  add_cors_middleware(app)
 
2
  from contextlib import asynccontextmanager
3
 
4
  from fastapi import FastAPI
 
5
  from loguru import logger
6
 
7
  from .server.chat import router as chat_router
 
92
  description="OpenAI-compatible API for Gemini Web",
93
  version="1.0.0",
94
  lifespan=lifespan,
95
+
96
  )
97
 
98
  add_cors_middleware(app)
app/server/chat.py CHANGED
@@ -1,6 +1,8 @@
 
1
  import base64
2
  import hashlib
3
  import io
 
4
  import re
5
  import reprlib
6
  import uuid
@@ -258,7 +260,7 @@ def _process_llm_output(
258
  # Let's import the cleaning functions? They are internal to client.py.
259
  # Better: Use GeminiClientWrapper.extract_output with a dummy ModelOutput.
260
 
261
- dummy_output = ModelOutput(text=visible_output, thoughts=None, images=[], metadata=[], candidates=[])
262
  # We don't want to re-add thoughts (include_thoughts=False) because we handle them separately?
263
  # extract_output puts thoughts in <think> tags if include_thoughts=True.
264
  # Here `visible_output` DOES NOT have thoughts (they are in `raw_output_with_think`'s <think> part).
@@ -280,7 +282,7 @@ def _process_llm_output(
280
  # Apply to storage_output too?
281
  # storage_output usually should match visible_output for history consistency.
282
  storage_output = GeminiClientWrapper.extract_output(
283
- ModelOutput(text=storage_output, thoughts=None, images=[], metadata=[], candidates=[]),
284
  include_thoughts=False
285
  )
286
  except Exception as e:
@@ -780,6 +782,40 @@ async def _send_with_split(
780
  Includes retry with exponential backoff for transient failures.
781
  """
782
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
783
  @retry_with_backoff(
784
  max_retries=3,
785
  base_delay=1.0,
@@ -792,7 +828,7 @@ async def _send_with_split(
792
  ) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
793
  """Internal function with retry logic."""
794
  if is_stream:
795
- return session.send_message_stream(content, files=file_list)
796
  return await session.send_message(content, files=file_list)
797
 
798
  if len(text) <= MAX_CHARS_PER_REQUEST:
@@ -1554,6 +1590,17 @@ def _create_responses_real_streaming_response(
1554
  if img.url not in seen_urls:
1555
  images.append(img)
1556
  seen_urls.add(img.url)
 
 
 
 
 
 
 
 
 
 
 
1557
 
1558
  response_contents, image_call_items = [], []
1559
  seen_hashes = set()
@@ -2000,6 +2047,22 @@ async def create_chat_completion(
2000
 
2001
  # Process images for OpenAI non-streaming flow
2002
  images = resp_or_stream.images or []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2003
  image_markdown = ""
2004
  seen_hashes = set()
2005
  for image in images:
@@ -2299,8 +2362,8 @@ async def create_response(
2299
  remain = [messages[-1]]
2300
  msgs = _prepare_messages_for_model(
2301
  remain,
2302
- request.tools,
2303
- request.tool_choice,
2304
  None,
2305
  False,
2306
  )
@@ -2366,10 +2429,28 @@ async def create_response(
2366
 
2367
  assistant_text, storage_output, tool_calls = _process_llm_output(raw_t, raw_c, struct_req)
2368
  images = resp_or_stream.images or []
2369
- if (
2370
- request.tool_choice is not None and request.tool_choice.type == "image_generation"
2371
- ) and not images:
2372
- raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2373
 
2374
  contents, img_calls = [], []
2375
  seen_hashes = set()
 
1
+ import asyncio
2
  import base64
3
  import hashlib
4
  import io
5
+ import random
6
  import re
7
  import reprlib
8
  import uuid
 
260
  # Let's import the cleaning functions? They are internal to client.py.
261
  # Better: Use GeminiClientWrapper.extract_output with a dummy ModelOutput.
262
 
263
+ dummy_output = ModelOutput(metadata=[], candidates=[{"rcid": "dummy", "text": visible_output}])
264
  # We don't want to re-add thoughts (include_thoughts=False) because we handle them separately?
265
  # extract_output puts thoughts in <think> tags if include_thoughts=True.
266
  # Here `visible_output` DOES NOT have thoughts (they are in `raw_output_with_think`'s <think> part).
 
282
  # Apply to storage_output too?
283
  # storage_output usually should match visible_output for history consistency.
284
  storage_output = GeminiClientWrapper.extract_output(
285
+ ModelOutput(metadata=[], candidates=[{"rcid": "dummy", "text": storage_output}]),
286
  include_thoughts=False
287
  )
288
  except Exception as e:
 
782
  Includes retry with exponential backoff for transient failures.
783
  """
784
 
785
+ async def _stream_with_retry(
786
+ content: str, file_list: list | None
787
+ ) -> AsyncGenerator[ModelOutput, None]:
788
+ """Manual retry logic for streaming."""
789
+ # Reduced retries to 1 to minimize ban risk
790
+ max_retries = 1
791
+ for attempt in range(max_retries + 1):
792
+ try:
793
+ gen = session.send_message_stream(content, files=file_list)
794
+ has_yielded = False
795
+ async for chunk in gen:
796
+ yield chunk
797
+ has_yielded = True
798
+ return
799
+ except Exception as e:
800
+ if has_yielded:
801
+ raise e
802
+
803
+ # Safety check: Do not retry if error indicates auth failure or rate limit
804
+ error_str = str(e).lower()
805
+ if "429" in error_str or "403" in error_str or "quota" in error_str:
806
+ raise e
807
+
808
+ if attempt < max_retries:
809
+ # Increased delay with randomness to avoid pattern detection
810
+ base_delay = 3.0
811
+ jitter = random.uniform(0.5, 1.5)
812
+ delay = base_delay + jitter
813
+
814
+ logger.warning(f"Stream failed to start (attempt {attempt+1}/{max_retries}). Retrying in {delay:.2f}s. Error: {e}")
815
+ await asyncio.sleep(delay)
816
+ else:
817
+ raise e
818
+
819
  @retry_with_backoff(
820
  max_retries=3,
821
  base_delay=1.0,
 
828
  ) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
829
  """Internal function with retry logic."""
830
  if is_stream:
831
+ return _stream_with_retry(content, file_list)
832
  return await session.send_message(content, files=file_list)
833
 
834
  if len(text) <= MAX_CHARS_PER_REQUEST:
 
1590
  if img.url not in seen_urls:
1591
  images.append(img)
1592
  seen_urls.add(img.url)
1593
+
1594
+ # Check if image generation was forced via tool_choice (same logic as non-streaming)
1595
+ image_generation_forced = (
1596
+ request.tool_choice is not None
1597
+ and isinstance(request.tool_choice, ResponseToolChoice)
1598
+ and request.tool_choice.type == "image_generation"
1599
+ )
1600
+ if image_generation_forced and not images and not assistant_text:
1601
+ logger.warning("Image generation was forced via tool_choice but no images or text were returned in stream.")
1602
+ yield f"data: {orjson.dumps({**base_event, 'type': 'error', 'error': {'message': 'No images returned from forced image generation request.'}}).decode('utf-8')}\n\n"
1603
+ return
1604
 
1605
  response_contents, image_call_items = [], []
1606
  seen_hashes = set()
 
2047
 
2048
  # Process images for OpenAI non-streaming flow
2049
  images = resp_or_stream.images or []
2050
+
2051
+ # Log response details for debugging
2052
+ logger.debug(f"Chat response: text_len={len(visible_output)}, images={len(images)}, tool_calls={len(tool_calls)}")
2053
+ logger.debug(f"Raw response text (first 500 chars): {raw_with_t[:500] if raw_with_t else 'EMPTY'}")
2054
+
2055
+ # Check if response is completely empty
2056
+ if not visible_output and not images and not tool_calls:
2057
+ logger.warning("Gemini returned an empty response for chat completion (no text, images, or tool calls)")
2058
+ # Log more details about the response object
2059
+ logger.debug(f"Response object type: {type(resp_or_stream)}")
2060
+ logger.debug(f"Response has candidates: {hasattr(resp_or_stream, 'candidates')}")
2061
+ if hasattr(resp_or_stream, 'candidates'):
2062
+ logger.debug(f"Candidates: {resp_or_stream.candidates}")
2063
+ if hasattr(resp_or_stream, 'text'):
2064
+ logger.debug(f"Response text attribute: {repr(resp_or_stream.text)[:200] if resp_or_stream.text else 'None'}")
2065
+
2066
  image_markdown = ""
2067
  seen_hashes = set()
2068
  for image in images:
 
2362
  remain = [messages[-1]]
2363
  msgs = _prepare_messages_for_model(
2364
  remain,
2365
+ standard_tools or None,
2366
+ model_tool_choice,
2367
  None,
2368
  False,
2369
  )
 
2429
 
2430
  assistant_text, storage_output, tool_calls = _process_llm_output(raw_t, raw_c, struct_req)
2431
  images = resp_or_stream.images or []
2432
+
2433
+ # Log response details for debugging
2434
+ logger.debug(f"Response: text_len={len(assistant_text)}, images={len(images)}, tool_calls={len(tool_calls)}")
2435
+
2436
+ # Check if response is completely empty (no text, no images, no tool calls)
2437
+ if not assistant_text and not images and not tool_calls:
2438
+ logger.warning("Gemini returned an empty response (no text, images, or tool calls)")
2439
+ # Check if there's an error in the response
2440
+ if hasattr(resp_or_stream, 'candidates') and resp_or_stream.candidates:
2441
+ logger.debug(f"Candidates: {resp_or_stream.candidates}")
2442
+
2443
+ # Check if image generation was forced via tool_choice
2444
+ # Only enforce image requirement if tool_choice explicitly requests image generation
2445
+ # tools: [{"type": "image_generation"}] just declares the tool is available, not mandatory
2446
+ image_generation_forced = (
2447
+ request.tool_choice is not None
2448
+ and isinstance(request.tool_choice, ResponseToolChoice)
2449
+ and request.tool_choice.type == "image_generation"
2450
+ )
2451
+ if image_generation_forced and not images and not assistant_text:
2452
+ logger.warning("Image generation was forced via tool_choice but no images or text were returned.")
2453
+ raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned from forced image generation request.")
2454
 
2455
  contents, img_calls = [], []
2456
  seen_hashes = set()
app/services/client.py CHANGED
@@ -117,6 +117,8 @@ class GeminiClientWrapper(GeminiClient):
117
  auto_refresh: bool = cast(bool, _UNSET),
118
  refresh_interval: float = cast(float, _UNSET),
119
  verbose: bool = cast(bool, _UNSET),
 
 
120
  ) -> None:
121
  """
122
  Inject default configuration values.
@@ -127,15 +129,21 @@ class GeminiClientWrapper(GeminiClient):
127
  refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval))
128
  verbose = cast(bool, _resolve(verbose, config.verbose))
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  try:
131
- await super().init(
132
- timeout=timeout,
133
- auto_close=auto_close,
134
- close_delay=close_delay,
135
- auto_refresh=auto_refresh,
136
- refresh_interval=refresh_interval,
137
- verbose=verbose,
138
- )
139
  except Exception:
140
  logger.exception(f"Failed to initialize GeminiClient {self.id}")
141
  raise
 
117
  auto_refresh: bool = cast(bool, _UNSET),
118
  refresh_interval: float = cast(float, _UNSET),
119
  verbose: bool = cast(bool, _UNSET),
120
+ watchdog_timeout: float = cast(float, _UNSET),
121
+ **kwargs: Any,
122
  ) -> None:
123
  """
124
  Inject default configuration values.
 
129
  refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval))
130
  verbose = cast(bool, _resolve(verbose, config.verbose))
131
 
132
+ call_kwargs: dict[str, Any] = {
133
+ "timeout": timeout,
134
+ "auto_close": auto_close,
135
+ "close_delay": close_delay,
136
+ "auto_refresh": auto_refresh,
137
+ "refresh_interval": refresh_interval,
138
+ "verbose": verbose,
139
+ }
140
+ if watchdog_timeout is not _UNSET:
141
+ call_kwargs["watchdog_timeout"] = watchdog_timeout
142
+ if kwargs:
143
+ call_kwargs.update(kwargs)
144
+
145
  try:
146
+ await super().init(**call_kwargs)
 
 
 
 
 
 
 
147
  except Exception:
148
  logger.exception(f"Failed to initialize GeminiClient {self.id}")
149
  raise
config/config.yaml CHANGED
@@ -2,7 +2,7 @@
2
 
3
  server:
4
  host: "0.0.0.0" # Server bind address
5
- port: 7860 # Server port
6
  api_key: "miaolegewang" # API key for authentication (null for no auth)
7
  https:
8
  enabled: false # Enable HTTPS
@@ -19,12 +19,12 @@ cors:
19
  gemini:
20
  clients:
21
  - id: "example-id-1" # Arbitrary client ID
22
- secure_1psid: "g.a0006wg0ISvzbHtAs0FzakIebdoqG5dGk9WFXw6e638gOIHnKKiv8YIi5uu5jaWX67qorJAznwACgYKAWASARYSFQHGX2MikWwtMeXo1Swux9m-XIyS2RoVAUF8yKoxSRH01AeWFX2soV9uEmfc0076"
23
- secure_1psidts: "sidts-CjIB7I_69IrJuStE_UhVsJNkMkIpeJblzj8dNfGM2kReQgfvnw0QveII_X9QLqyoJ0liIhAA"
24
  proxy: null # Optional proxy URL (null/empty means direct connection)
25
  timeout: 120 # Init timeout in seconds
26
  auto_refresh: true # Auto-refresh session cookies
27
- refresh_interval: 540 # Refresh interval in seconds
28
  verbose: false # Enable verbose logging for Gemini requests
29
  max_chars_per_request: 1000000 # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
30
  model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
 
2
 
3
  server:
4
  host: "0.0.0.0" # Server bind address
5
+ port: 8058 # Server port
6
  api_key: "miaolegewang" # API key for authentication (null for no auth)
7
  https:
8
  enabled: false # Enable HTTPS
 
19
  gemini:
20
  clients:
21
  - id: "example-id-1" # Arbitrary client ID
22
+ secure_1psid: "g.a0006ghLMuTMAgxpLCPEErGijJmoYdaeoQ8jQvccH014n1gYV_A6_9K7SUzPUYqsFZYeKCFj0QACgYKAe8SARcSFQHGX2MiQREdDmeRiaB-meb6rxWh1RoVAUF8yKqKLXLomJGmcEm899BsY1K90076"
23
+ secure_1psidts: "sidts-CjEB7I_69ImkFO--6ZoqrN9V6ThAw-I__6wzxDni5ELJR7Ia4najVtYeQCmjpk01XH0yEAA"
24
  proxy: null # Optional proxy URL (null/empty means direct connection)
25
  timeout: 120 # Init timeout in seconds
26
  auto_refresh: true # Auto-refresh session cookies
27
+ refresh_interval: 3600 # Refresh interval in seconds
28
  verbose: false # Enable verbose logging for Gemini requests
29
  max_chars_per_request: 1000000 # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
30
  model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
run.py CHANGED
@@ -13,9 +13,6 @@ if __name__ == "__main__":
13
  # Setup loguru logging
14
  setup_logging(level=g_config.logging.level)
15
 
16
- # Override port from environment variable (for HuggingFace Spaces compatibility)
17
- port = int(os.getenv("PORT", g_config.server.port))
18
-
19
  # Check HTTPS configuration
20
  if g_config.server.https.enabled:
21
  key_path = g_config.server.https.key_file
@@ -28,20 +25,20 @@ if __name__ == "__main__":
28
  )
29
  sys.exit(1)
30
 
31
- logger.info(f"Starting server at https://{g_config.server.host}:{port} ...")
32
  uvicorn.run(
33
  app,
34
  host=g_config.server.host,
35
- port=port,
36
  log_config=None,
37
  ssl_keyfile=key_path,
38
  ssl_certfile=cert_path,
39
  )
40
  else:
41
- logger.info(f"Starting server at http://{g_config.server.host}:{port} ...")
42
  uvicorn.run(
43
  app,
44
  host=g_config.server.host,
45
- port=port,
46
  log_config=None,
47
  )
 
13
  # Setup loguru logging
14
  setup_logging(level=g_config.logging.level)
15
 
 
 
 
16
  # Check HTTPS configuration
17
  if g_config.server.https.enabled:
18
  key_path = g_config.server.https.key_file
 
25
  )
26
  sys.exit(1)
27
 
28
+ logger.info(f"Starting server at https://{g_config.server.host}:{g_config.server.port} ...")
29
  uvicorn.run(
30
  app,
31
  host=g_config.server.host,
32
+ port=g_config.server.port,
33
  log_config=None,
34
  ssl_keyfile=key_path,
35
  ssl_certfile=cert_path,
36
  )
37
  else:
38
+ logger.info(f"Starting server at http://{g_config.server.host}:{g_config.server.port} ...")
39
  uvicorn.run(
40
  app,
41
  host=g_config.server.host,
42
+ port=g_config.server.port,
43
  log_config=None,
44
  )