Spaces:
Paused
Paused
Commit ·
b2c2a89
1
Parent(s): 917fdb0
update: 同步最新代码与配置
Browse files- app/main.py +1 -2
- app/server/chat.py +90 -9
- app/services/client.py +16 -8
- config/config.yaml +4 -4
- run.py +4 -7
app/main.py
CHANGED
|
@@ -2,7 +2,6 @@ import asyncio
|
|
| 2 |
from contextlib import asynccontextmanager
|
| 3 |
|
| 4 |
from fastapi import FastAPI
|
| 5 |
-
from fastapi.responses import ORJSONResponse
|
| 6 |
from loguru import logger
|
| 7 |
|
| 8 |
from .server.chat import router as chat_router
|
|
@@ -93,7 +92,7 @@ def create_app() -> FastAPI:
|
|
| 93 |
description="OpenAI-compatible API for Gemini Web",
|
| 94 |
version="1.0.0",
|
| 95 |
lifespan=lifespan,
|
| 96 |
-
|
| 97 |
)
|
| 98 |
|
| 99 |
add_cors_middleware(app)
|
|
|
|
| 2 |
from contextlib import asynccontextmanager
|
| 3 |
|
| 4 |
from fastapi import FastAPI
|
|
|
|
| 5 |
from loguru import logger
|
| 6 |
|
| 7 |
from .server.chat import router as chat_router
|
|
|
|
| 92 |
description="OpenAI-compatible API for Gemini Web",
|
| 93 |
version="1.0.0",
|
| 94 |
lifespan=lifespan,
|
| 95 |
+
|
| 96 |
)
|
| 97 |
|
| 98 |
add_cors_middleware(app)
|
app/server/chat.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
|
|
| 1 |
import base64
|
| 2 |
import hashlib
|
| 3 |
import io
|
|
|
|
| 4 |
import re
|
| 5 |
import reprlib
|
| 6 |
import uuid
|
|
@@ -258,7 +260,7 @@ def _process_llm_output(
|
|
| 258 |
# Let's import the cleaning functions? They are internal to client.py.
|
| 259 |
# Better: Use GeminiClientWrapper.extract_output with a dummy ModelOutput.
|
| 260 |
|
| 261 |
-
dummy_output = ModelOutput(
|
| 262 |
# We don't want to re-add thoughts (include_thoughts=False) because we handle them separately?
|
| 263 |
# extract_output puts thoughts in <think> tags if include_thoughts=True.
|
| 264 |
# Here `visible_output` DOES NOT have thoughts (they are in `raw_output_with_think`'s <think> part).
|
|
@@ -280,7 +282,7 @@ def _process_llm_output(
|
|
| 280 |
# Apply to storage_output too?
|
| 281 |
# storage_output usually should match visible_output for history consistency.
|
| 282 |
storage_output = GeminiClientWrapper.extract_output(
|
| 283 |
-
ModelOutput(
|
| 284 |
include_thoughts=False
|
| 285 |
)
|
| 286 |
except Exception as e:
|
|
@@ -780,6 +782,40 @@ async def _send_with_split(
|
|
| 780 |
Includes retry with exponential backoff for transient failures.
|
| 781 |
"""
|
| 782 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 783 |
@retry_with_backoff(
|
| 784 |
max_retries=3,
|
| 785 |
base_delay=1.0,
|
|
@@ -792,7 +828,7 @@ async def _send_with_split(
|
|
| 792 |
) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
|
| 793 |
"""Internal function with retry logic."""
|
| 794 |
if is_stream:
|
| 795 |
-
return
|
| 796 |
return await session.send_message(content, files=file_list)
|
| 797 |
|
| 798 |
if len(text) <= MAX_CHARS_PER_REQUEST:
|
|
@@ -1554,6 +1590,17 @@ def _create_responses_real_streaming_response(
|
|
| 1554 |
if img.url not in seen_urls:
|
| 1555 |
images.append(img)
|
| 1556 |
seen_urls.add(img.url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1557 |
|
| 1558 |
response_contents, image_call_items = [], []
|
| 1559 |
seen_hashes = set()
|
|
@@ -2000,6 +2047,22 @@ async def create_chat_completion(
|
|
| 2000 |
|
| 2001 |
# Process images for OpenAI non-streaming flow
|
| 2002 |
images = resp_or_stream.images or []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2003 |
image_markdown = ""
|
| 2004 |
seen_hashes = set()
|
| 2005 |
for image in images:
|
|
@@ -2299,8 +2362,8 @@ async def create_response(
|
|
| 2299 |
remain = [messages[-1]]
|
| 2300 |
msgs = _prepare_messages_for_model(
|
| 2301 |
remain,
|
| 2302 |
-
|
| 2303 |
-
|
| 2304 |
None,
|
| 2305 |
False,
|
| 2306 |
)
|
|
@@ -2366,10 +2429,28 @@ async def create_response(
|
|
| 2366 |
|
| 2367 |
assistant_text, storage_output, tool_calls = _process_llm_output(raw_t, raw_c, struct_req)
|
| 2368 |
images = resp_or_stream.images or []
|
| 2369 |
-
|
| 2370 |
-
|
| 2371 |
-
)
|
| 2372 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2373 |
|
| 2374 |
contents, img_calls = [], []
|
| 2375 |
seen_hashes = set()
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
import base64
|
| 3 |
import hashlib
|
| 4 |
import io
|
| 5 |
+
import random
|
| 6 |
import re
|
| 7 |
import reprlib
|
| 8 |
import uuid
|
|
|
|
| 260 |
# Let's import the cleaning functions? They are internal to client.py.
|
| 261 |
# Better: Use GeminiClientWrapper.extract_output with a dummy ModelOutput.
|
| 262 |
|
| 263 |
+
dummy_output = ModelOutput(metadata=[], candidates=[{"rcid": "dummy", "text": visible_output}])
|
| 264 |
# We don't want to re-add thoughts (include_thoughts=False) because we handle them separately?
|
| 265 |
# extract_output puts thoughts in <think> tags if include_thoughts=True.
|
| 266 |
# Here `visible_output` DOES NOT have thoughts (they are in `raw_output_with_think`'s <think> part).
|
|
|
|
| 282 |
# Apply to storage_output too?
|
| 283 |
# storage_output usually should match visible_output for history consistency.
|
| 284 |
storage_output = GeminiClientWrapper.extract_output(
|
| 285 |
+
ModelOutput(metadata=[], candidates=[{"rcid": "dummy", "text": storage_output}]),
|
| 286 |
include_thoughts=False
|
| 287 |
)
|
| 288 |
except Exception as e:
|
|
|
|
| 782 |
Includes retry with exponential backoff for transient failures.
|
| 783 |
"""
|
| 784 |
|
| 785 |
+
async def _stream_with_retry(
|
| 786 |
+
content: str, file_list: list | None
|
| 787 |
+
) -> AsyncGenerator[ModelOutput, None]:
|
| 788 |
+
"""Manual retry logic for streaming."""
|
| 789 |
+
# Reduced retries to 1 to minimize ban risk
|
| 790 |
+
max_retries = 1
|
| 791 |
+
for attempt in range(max_retries + 1):
|
| 792 |
+
try:
|
| 793 |
+
gen = session.send_message_stream(content, files=file_list)
|
| 794 |
+
has_yielded = False
|
| 795 |
+
async for chunk in gen:
|
| 796 |
+
yield chunk
|
| 797 |
+
has_yielded = True
|
| 798 |
+
return
|
| 799 |
+
except Exception as e:
|
| 800 |
+
if has_yielded:
|
| 801 |
+
raise e
|
| 802 |
+
|
| 803 |
+
# Safety check: Do not retry if error indicates auth failure or rate limit
|
| 804 |
+
error_str = str(e).lower()
|
| 805 |
+
if "429" in error_str or "403" in error_str or "quota" in error_str:
|
| 806 |
+
raise e
|
| 807 |
+
|
| 808 |
+
if attempt < max_retries:
|
| 809 |
+
# Increased delay with randomness to avoid pattern detection
|
| 810 |
+
base_delay = 3.0
|
| 811 |
+
jitter = random.uniform(0.5, 1.5)
|
| 812 |
+
delay = base_delay + jitter
|
| 813 |
+
|
| 814 |
+
logger.warning(f"Stream failed to start (attempt {attempt+1}/{max_retries}). Retrying in {delay:.2f}s. Error: {e}")
|
| 815 |
+
await asyncio.sleep(delay)
|
| 816 |
+
else:
|
| 817 |
+
raise e
|
| 818 |
+
|
| 819 |
@retry_with_backoff(
|
| 820 |
max_retries=3,
|
| 821 |
base_delay=1.0,
|
|
|
|
| 828 |
) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
|
| 829 |
"""Internal function with retry logic."""
|
| 830 |
if is_stream:
|
| 831 |
+
return _stream_with_retry(content, file_list)
|
| 832 |
return await session.send_message(content, files=file_list)
|
| 833 |
|
| 834 |
if len(text) <= MAX_CHARS_PER_REQUEST:
|
|
|
|
| 1590 |
if img.url not in seen_urls:
|
| 1591 |
images.append(img)
|
| 1592 |
seen_urls.add(img.url)
|
| 1593 |
+
|
| 1594 |
+
# Check if image generation was forced via tool_choice (same logic as non-streaming)
|
| 1595 |
+
image_generation_forced = (
|
| 1596 |
+
request.tool_choice is not None
|
| 1597 |
+
and isinstance(request.tool_choice, ResponseToolChoice)
|
| 1598 |
+
and request.tool_choice.type == "image_generation"
|
| 1599 |
+
)
|
| 1600 |
+
if image_generation_forced and not images and not assistant_text:
|
| 1601 |
+
logger.warning("Image generation was forced via tool_choice but no images or text were returned in stream.")
|
| 1602 |
+
yield f"data: {orjson.dumps({**base_event, 'type': 'error', 'error': {'message': 'No images returned from forced image generation request.'}}).decode('utf-8')}\n\n"
|
| 1603 |
+
return
|
| 1604 |
|
| 1605 |
response_contents, image_call_items = [], []
|
| 1606 |
seen_hashes = set()
|
|
|
|
| 2047 |
|
| 2048 |
# Process images for OpenAI non-streaming flow
|
| 2049 |
images = resp_or_stream.images or []
|
| 2050 |
+
|
| 2051 |
+
# Log response details for debugging
|
| 2052 |
+
logger.debug(f"Chat response: text_len={len(visible_output)}, images={len(images)}, tool_calls={len(tool_calls)}")
|
| 2053 |
+
logger.debug(f"Raw response text (first 500 chars): {raw_with_t[:500] if raw_with_t else 'EMPTY'}")
|
| 2054 |
+
|
| 2055 |
+
# Check if response is completely empty
|
| 2056 |
+
if not visible_output and not images and not tool_calls:
|
| 2057 |
+
logger.warning("Gemini returned an empty response for chat completion (no text, images, or tool calls)")
|
| 2058 |
+
# Log more details about the response object
|
| 2059 |
+
logger.debug(f"Response object type: {type(resp_or_stream)}")
|
| 2060 |
+
logger.debug(f"Response has candidates: {hasattr(resp_or_stream, 'candidates')}")
|
| 2061 |
+
if hasattr(resp_or_stream, 'candidates'):
|
| 2062 |
+
logger.debug(f"Candidates: {resp_or_stream.candidates}")
|
| 2063 |
+
if hasattr(resp_or_stream, 'text'):
|
| 2064 |
+
logger.debug(f"Response text attribute: {repr(resp_or_stream.text)[:200] if resp_or_stream.text else 'None'}")
|
| 2065 |
+
|
| 2066 |
image_markdown = ""
|
| 2067 |
seen_hashes = set()
|
| 2068 |
for image in images:
|
|
|
|
| 2362 |
remain = [messages[-1]]
|
| 2363 |
msgs = _prepare_messages_for_model(
|
| 2364 |
remain,
|
| 2365 |
+
standard_tools or None,
|
| 2366 |
+
model_tool_choice,
|
| 2367 |
None,
|
| 2368 |
False,
|
| 2369 |
)
|
|
|
|
| 2429 |
|
| 2430 |
assistant_text, storage_output, tool_calls = _process_llm_output(raw_t, raw_c, struct_req)
|
| 2431 |
images = resp_or_stream.images or []
|
| 2432 |
+
|
| 2433 |
+
# Log response details for debugging
|
| 2434 |
+
logger.debug(f"Response: text_len={len(assistant_text)}, images={len(images)}, tool_calls={len(tool_calls)}")
|
| 2435 |
+
|
| 2436 |
+
# Check if response is completely empty (no text, no images, no tool calls)
|
| 2437 |
+
if not assistant_text and not images and not tool_calls:
|
| 2438 |
+
logger.warning("Gemini returned an empty response (no text, images, or tool calls)")
|
| 2439 |
+
# Check if there's an error in the response
|
| 2440 |
+
if hasattr(resp_or_stream, 'candidates') and resp_or_stream.candidates:
|
| 2441 |
+
logger.debug(f"Candidates: {resp_or_stream.candidates}")
|
| 2442 |
+
|
| 2443 |
+
# Check if image generation was forced via tool_choice
|
| 2444 |
+
# Only enforce image requirement if tool_choice explicitly requests image generation
|
| 2445 |
+
# tools: [{"type": "image_generation"}] just declares the tool is available, not mandatory
|
| 2446 |
+
image_generation_forced = (
|
| 2447 |
+
request.tool_choice is not None
|
| 2448 |
+
and isinstance(request.tool_choice, ResponseToolChoice)
|
| 2449 |
+
and request.tool_choice.type == "image_generation"
|
| 2450 |
+
)
|
| 2451 |
+
if image_generation_forced and not images and not assistant_text:
|
| 2452 |
+
logger.warning("Image generation was forced via tool_choice but no images or text were returned.")
|
| 2453 |
+
raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="No images returned from forced image generation request.")
|
| 2454 |
|
| 2455 |
contents, img_calls = [], []
|
| 2456 |
seen_hashes = set()
|
app/services/client.py
CHANGED
|
@@ -117,6 +117,8 @@ class GeminiClientWrapper(GeminiClient):
|
|
| 117 |
auto_refresh: bool = cast(bool, _UNSET),
|
| 118 |
refresh_interval: float = cast(float, _UNSET),
|
| 119 |
verbose: bool = cast(bool, _UNSET),
|
|
|
|
|
|
|
| 120 |
) -> None:
|
| 121 |
"""
|
| 122 |
Inject default configuration values.
|
|
@@ -127,15 +129,21 @@ class GeminiClientWrapper(GeminiClient):
|
|
| 127 |
refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval))
|
| 128 |
verbose = cast(bool, _resolve(verbose, config.verbose))
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
try:
|
| 131 |
-
await super().init(
|
| 132 |
-
timeout=timeout,
|
| 133 |
-
auto_close=auto_close,
|
| 134 |
-
close_delay=close_delay,
|
| 135 |
-
auto_refresh=auto_refresh,
|
| 136 |
-
refresh_interval=refresh_interval,
|
| 137 |
-
verbose=verbose,
|
| 138 |
-
)
|
| 139 |
except Exception:
|
| 140 |
logger.exception(f"Failed to initialize GeminiClient {self.id}")
|
| 141 |
raise
|
|
|
|
| 117 |
auto_refresh: bool = cast(bool, _UNSET),
|
| 118 |
refresh_interval: float = cast(float, _UNSET),
|
| 119 |
verbose: bool = cast(bool, _UNSET),
|
| 120 |
+
watchdog_timeout: float = cast(float, _UNSET),
|
| 121 |
+
**kwargs: Any,
|
| 122 |
) -> None:
|
| 123 |
"""
|
| 124 |
Inject default configuration values.
|
|
|
|
| 129 |
refresh_interval = cast(float, _resolve(refresh_interval, config.refresh_interval))
|
| 130 |
verbose = cast(bool, _resolve(verbose, config.verbose))
|
| 131 |
|
| 132 |
+
call_kwargs: dict[str, Any] = {
|
| 133 |
+
"timeout": timeout,
|
| 134 |
+
"auto_close": auto_close,
|
| 135 |
+
"close_delay": close_delay,
|
| 136 |
+
"auto_refresh": auto_refresh,
|
| 137 |
+
"refresh_interval": refresh_interval,
|
| 138 |
+
"verbose": verbose,
|
| 139 |
+
}
|
| 140 |
+
if watchdog_timeout is not _UNSET:
|
| 141 |
+
call_kwargs["watchdog_timeout"] = watchdog_timeout
|
| 142 |
+
if kwargs:
|
| 143 |
+
call_kwargs.update(kwargs)
|
| 144 |
+
|
| 145 |
try:
|
| 146 |
+
await super().init(**call_kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
except Exception:
|
| 148 |
logger.exception(f"Failed to initialize GeminiClient {self.id}")
|
| 149 |
raise
|
config/config.yaml
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
|
| 3 |
server:
|
| 4 |
host: "0.0.0.0" # Server bind address
|
| 5 |
-
port:
|
| 6 |
api_key: "miaolegewang" # API key for authentication (null for no auth)
|
| 7 |
https:
|
| 8 |
enabled: false # Enable HTTPS
|
|
@@ -19,12 +19,12 @@ cors:
|
|
| 19 |
gemini:
|
| 20 |
clients:
|
| 21 |
- id: "example-id-1" # Arbitrary client ID
|
| 22 |
-
secure_1psid: "g.
|
| 23 |
-
secure_1psidts: "sidts-
|
| 24 |
proxy: null # Optional proxy URL (null/empty means direct connection)
|
| 25 |
timeout: 120 # Init timeout in seconds
|
| 26 |
auto_refresh: true # Auto-refresh session cookies
|
| 27 |
-
refresh_interval:
|
| 28 |
verbose: false # Enable verbose logging for Gemini requests
|
| 29 |
max_chars_per_request: 1000000 # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
|
| 30 |
model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
|
|
|
|
| 2 |
|
| 3 |
server:
|
| 4 |
host: "0.0.0.0" # Server bind address
|
| 5 |
+
port: 8058 # Server port
|
| 6 |
api_key: "miaolegewang" # API key for authentication (null for no auth)
|
| 7 |
https:
|
| 8 |
enabled: false # Enable HTTPS
|
|
|
|
| 19 |
gemini:
|
| 20 |
clients:
|
| 21 |
- id: "example-id-1" # Arbitrary client ID
|
| 22 |
+
secure_1psid: "g.a0006ghLMuTMAgxpLCPEErGijJmoYdaeoQ8jQvccH014n1gYV_A6_9K7SUzPUYqsFZYeKCFj0QACgYKAe8SARcSFQHGX2MiQREdDmeRiaB-meb6rxWh1RoVAUF8yKqKLXLomJGmcEm899BsY1K90076"
|
| 23 |
+
secure_1psidts: "sidts-CjEB7I_69ImkFO--6ZoqrN9V6ThAw-I__6wzxDni5ELJR7Ia4najVtYeQCmjpk01XH0yEAA"
|
| 24 |
proxy: null # Optional proxy URL (null/empty means direct connection)
|
| 25 |
timeout: 120 # Init timeout in seconds
|
| 26 |
auto_refresh: true # Auto-refresh session cookies
|
| 27 |
+
refresh_interval: 3600 # Refresh interval in seconds
|
| 28 |
verbose: false # Enable verbose logging for Gemini requests
|
| 29 |
max_chars_per_request: 1000000 # Maximum characters Gemini Web accepts per request. Non-pro users might have a lower limit
|
| 30 |
model_strategy: "append" # Strategy: 'append' (default + custom) or 'overwrite' (custom only)
|
run.py
CHANGED
|
@@ -13,9 +13,6 @@ if __name__ == "__main__":
|
|
| 13 |
# Setup loguru logging
|
| 14 |
setup_logging(level=g_config.logging.level)
|
| 15 |
|
| 16 |
-
# Override port from environment variable (for HuggingFace Spaces compatibility)
|
| 17 |
-
port = int(os.getenv("PORT", g_config.server.port))
|
| 18 |
-
|
| 19 |
# Check HTTPS configuration
|
| 20 |
if g_config.server.https.enabled:
|
| 21 |
key_path = g_config.server.https.key_file
|
|
@@ -28,20 +25,20 @@ if __name__ == "__main__":
|
|
| 28 |
)
|
| 29 |
sys.exit(1)
|
| 30 |
|
| 31 |
-
logger.info(f"Starting server at https://{g_config.server.host}:{port} ...")
|
| 32 |
uvicorn.run(
|
| 33 |
app,
|
| 34 |
host=g_config.server.host,
|
| 35 |
-
port=port,
|
| 36 |
log_config=None,
|
| 37 |
ssl_keyfile=key_path,
|
| 38 |
ssl_certfile=cert_path,
|
| 39 |
)
|
| 40 |
else:
|
| 41 |
-
logger.info(f"Starting server at http://{g_config.server.host}:{port} ...")
|
| 42 |
uvicorn.run(
|
| 43 |
app,
|
| 44 |
host=g_config.server.host,
|
| 45 |
-
port=port,
|
| 46 |
log_config=None,
|
| 47 |
)
|
|
|
|
| 13 |
# Setup loguru logging
|
| 14 |
setup_logging(level=g_config.logging.level)
|
| 15 |
|
|
|
|
|
|
|
|
|
|
| 16 |
# Check HTTPS configuration
|
| 17 |
if g_config.server.https.enabled:
|
| 18 |
key_path = g_config.server.https.key_file
|
|
|
|
| 25 |
)
|
| 26 |
sys.exit(1)
|
| 27 |
|
| 28 |
+
logger.info(f"Starting server at https://{g_config.server.host}:{g_config.server.port} ...")
|
| 29 |
uvicorn.run(
|
| 30 |
app,
|
| 31 |
host=g_config.server.host,
|
| 32 |
+
port=g_config.server.port,
|
| 33 |
log_config=None,
|
| 34 |
ssl_keyfile=key_path,
|
| 35 |
ssl_certfile=cert_path,
|
| 36 |
)
|
| 37 |
else:
|
| 38 |
+
logger.info(f"Starting server at http://{g_config.server.host}:{g_config.server.port} ...")
|
| 39 |
uvicorn.run(
|
| 40 |
app,
|
| 41 |
host=g_config.server.host,
|
| 42 |
+
port=g_config.server.port,
|
| 43 |
log_config=None,
|
| 44 |
)
|