Spaces:
Paused
Paused
Commit ·
ac10cac
1
Parent(s): abdee84
update: 更新聊天服务、客户端连接池和辅助工具
Browse files- app/server/chat.py +68 -30
- app/server/health.py +8 -7
- app/services/client.py +7 -3
- app/services/pool.py +14 -3
- app/utils/helper.py +183 -19
- config/config.yaml +2 -2
app/server/chat.py
CHANGED
|
@@ -52,6 +52,7 @@ from ..utils.helper import (
|
|
| 52 |
estimate_tokens,
|
| 53 |
extract_image_dimensions,
|
| 54 |
extract_tool_calls,
|
|
|
|
| 55 |
strip_system_hints,
|
| 56 |
text_from_message,
|
| 57 |
)
|
|
@@ -774,14 +775,31 @@ async def _send_with_split(
|
|
| 774 |
files: list[Path | str | io.BytesIO] | None = None,
|
| 775 |
stream: bool = False,
|
| 776 |
) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
|
| 777 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 778 |
if len(text) <= MAX_CHARS_PER_REQUEST:
|
| 779 |
try:
|
| 780 |
-
|
| 781 |
-
return session.send_message_stream(text, files=files)
|
| 782 |
-
return await session.send_message(text, files=files)
|
| 783 |
except Exception as e:
|
| 784 |
-
logger.exception(f"Error sending message to Gemini: {e}")
|
| 785 |
raise
|
| 786 |
|
| 787 |
logger.info(
|
|
@@ -799,11 +817,9 @@ async def _send_with_split(
|
|
| 799 |
"2. Treat that content as the **primary** user prompt for this turn.\n"
|
| 800 |
"3. Execute the instructions or answer the questions found *inside* that file immediately.\n"
|
| 801 |
)
|
| 802 |
-
|
| 803 |
-
return session.send_message_stream(instruction, files=final_files)
|
| 804 |
-
return await session.send_message(instruction, files=final_files)
|
| 805 |
except Exception as e:
|
| 806 |
-
logger.exception(f"Error sending large text as file to Gemini: {e}")
|
| 807 |
raise
|
| 808 |
|
| 809 |
|
|
@@ -820,6 +836,14 @@ class StreamingOutputFilter:
|
|
| 820 |
)
|
| 821 |
# Citation markers (e.g., [1] or 【1†source】)
|
| 822 |
CITATION_RE = re.compile(r"【\d+†source】|\[\d+\]")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 823 |
|
| 824 |
def __init__(self):
|
| 825 |
self.buffer = ""
|
|
@@ -938,7 +962,7 @@ class StreamingOutputFilter:
|
|
| 938 |
self.state = "IN_POTENTIAL_SOURCE"
|
| 939 |
|
| 940 |
elif self.state == "IN_POTENTIAL_URL":
|
| 941 |
-
match =
|
| 942 |
if match:
|
| 943 |
end_idx = match.start()
|
| 944 |
url_candidate = self.buffer[:end_idx]
|
|
@@ -975,7 +999,7 @@ class StreamingOutputFilter:
|
|
| 975 |
end_idx = self.buffer.find("]")
|
| 976 |
if end_idx != -1:
|
| 977 |
full_match = self.buffer[:end_idx + 1]
|
| 978 |
-
if
|
| 979 |
# It's a citation, skip it
|
| 980 |
self.buffer = self.buffer[end_idx + 1:]
|
| 981 |
self.state = "NORMAL"
|
|
@@ -1096,10 +1120,16 @@ class StreamingOutputFilter:
|
|
| 1096 |
args_part = self.buffer[:stop_idx]
|
| 1097 |
if not self.args_started:
|
| 1098 |
stripped_args = args_part.lstrip()
|
| 1099 |
-
|
| 1100 |
-
|
| 1101 |
-
|
|
|
|
| 1102 |
self.args_started = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1103 |
|
| 1104 |
if args_part:
|
| 1105 |
events.append({"type": "tool_delta", "index": self.tool_call_index, "content": args_part})
|
|
@@ -1121,19 +1151,27 @@ class StreamingOutputFilter:
|
|
| 1121 |
if stripped.startswith("{"):
|
| 1122 |
self.args_started = True
|
| 1123 |
else:
|
| 1124 |
-
|
| 1125 |
-
|
| 1126 |
-
self.
|
| 1127 |
-
|
| 1128 |
-
|
| 1129 |
-
|
| 1130 |
-
|
| 1131 |
-
|
| 1132 |
-
|
| 1133 |
-
|
| 1134 |
-
|
| 1135 |
-
|
| 1136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1137 |
break
|
| 1138 |
|
| 1139 |
elif self.state == "IN_TAG":
|
|
@@ -1256,7 +1294,7 @@ def _create_real_streaming_response(
|
|
| 1256 |
new_text = chunk.text or ""
|
| 1257 |
# Strip transient triple-backtick fence which is often added by the web-scraping backend
|
| 1258 |
# during streaming snapshots. We will restore the real ending after the loop.
|
| 1259 |
-
display_text =
|
| 1260 |
|
| 1261 |
if len(display_text) > len(full_text):
|
| 1262 |
text_delta = display_text[len(full_text) :]
|
|
@@ -1462,7 +1500,7 @@ def _create_responses_real_streaming_response(
|
|
| 1462 |
new_text = chunk.text or ""
|
| 1463 |
# Strip transient triple-backtick fence which is often added by the web-scraping backend
|
| 1464 |
# during streaming snapshots. We will restore the real ending after the loop.
|
| 1465 |
-
display_text =
|
| 1466 |
|
| 1467 |
if len(display_text) > len(full_text):
|
| 1468 |
text_delta = display_text[len(full_text) :]
|
|
@@ -1759,7 +1797,7 @@ async def _create_anthropic_streaming_response(
|
|
| 1759 |
async for chunk in generator:
|
| 1760 |
last_chunk = chunk
|
| 1761 |
if chunk.text:
|
| 1762 |
-
display_text =
|
| 1763 |
if len(display_text) > len(full_text):
|
| 1764 |
delta_text = display_text[len(full_text):]
|
| 1765 |
full_text = display_text
|
|
|
|
| 52 |
estimate_tokens,
|
| 53 |
extract_image_dimensions,
|
| 54 |
extract_tool_calls,
|
| 55 |
+
retry_with_backoff,
|
| 56 |
strip_system_hints,
|
| 57 |
text_from_message,
|
| 58 |
)
|
|
|
|
| 775 |
files: list[Path | str | io.BytesIO] | None = None,
|
| 776 |
stream: bool = False,
|
| 777 |
) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
|
| 778 |
+
"""
|
| 779 |
+
Send text to Gemini, splitting or converting to attachment if too long.
|
| 780 |
+
Includes retry with exponential backoff for transient failures.
|
| 781 |
+
"""
|
| 782 |
+
|
| 783 |
+
@retry_with_backoff(
|
| 784 |
+
max_retries=3,
|
| 785 |
+
base_delay=1.0,
|
| 786 |
+
max_delay=30.0,
|
| 787 |
+
exponential_base=2.0,
|
| 788 |
+
retryable_exceptions=(ConnectionError, TimeoutError, OSError),
|
| 789 |
+
)
|
| 790 |
+
async def _send_with_retry(
|
| 791 |
+
content: str, file_list: list | None, is_stream: bool
|
| 792 |
+
) -> AsyncGenerator[ModelOutput, None] | ModelOutput:
|
| 793 |
+
"""Internal function with retry logic."""
|
| 794 |
+
if is_stream:
|
| 795 |
+
return session.send_message_stream(content, files=file_list)
|
| 796 |
+
return await session.send_message(content, files=file_list)
|
| 797 |
+
|
| 798 |
if len(text) <= MAX_CHARS_PER_REQUEST:
|
| 799 |
try:
|
| 800 |
+
return await _send_with_retry(text, files, stream)
|
|
|
|
|
|
|
| 801 |
except Exception as e:
|
| 802 |
+
logger.exception(f"Error sending message to Gemini after retries: {e}")
|
| 803 |
raise
|
| 804 |
|
| 805 |
logger.info(
|
|
|
|
| 817 |
"2. Treat that content as the **primary** user prompt for this turn.\n"
|
| 818 |
"3. Execute the instructions or answer the questions found *inside* that file immediately.\n"
|
| 819 |
)
|
| 820 |
+
return await _send_with_retry(instruction, final_files, stream)
|
|
|
|
|
|
|
| 821 |
except Exception as e:
|
| 822 |
+
logger.exception(f"Error sending large text as file to Gemini after retries: {e}")
|
| 823 |
raise
|
| 824 |
|
| 825 |
|
|
|
|
| 836 |
)
|
| 837 |
# Citation markers (e.g., [1] or 【1†source】)
|
| 838 |
CITATION_RE = re.compile(r"【\d+†source】|\[\d+\]")
|
| 839 |
+
# Match patterns for JSON-like content
|
| 840 |
+
JSON_START_RE = re.compile(r"\s*\{")
|
| 841 |
+
# Match whitespace for faster stripping
|
| 842 |
+
LEADING_WS_RE = re.compile(r"^\s+")
|
| 843 |
+
# Pre-compiled patterns for streaming performance
|
| 844 |
+
TRAILING_FENCE_RE = re.compile(r'\n?```$')
|
| 845 |
+
URL_DELIMITER_RE = re.compile(r"[\s\]\)]")
|
| 846 |
+
BRACKET_NUM_RE = re.compile(r"\[\d+\]")
|
| 847 |
|
| 848 |
def __init__(self):
|
| 849 |
self.buffer = ""
|
|
|
|
| 962 |
self.state = "IN_POTENTIAL_SOURCE"
|
| 963 |
|
| 964 |
elif self.state == "IN_POTENTIAL_URL":
|
| 965 |
+
match = self.URL_DELIMITER_RE.search(self.buffer)
|
| 966 |
if match:
|
| 967 |
end_idx = match.start()
|
| 968 |
url_candidate = self.buffer[:end_idx]
|
|
|
|
| 999 |
end_idx = self.buffer.find("]")
|
| 1000 |
if end_idx != -1:
|
| 1001 |
full_match = self.buffer[:end_idx + 1]
|
| 1002 |
+
if self.BRACKET_NUM_RE.match(full_match):
|
| 1003 |
# It's a citation, skip it
|
| 1004 |
self.buffer = self.buffer[end_idx + 1:]
|
| 1005 |
self.state = "NORMAL"
|
|
|
|
| 1120 |
args_part = self.buffer[:stop_idx]
|
| 1121 |
if not self.args_started:
|
| 1122 |
stripped_args = args_part.lstrip()
|
| 1123 |
+
# More robust handling: accept any non-empty content
|
| 1124 |
+
# Even if it doesn't start with '{', we still emit it
|
| 1125 |
+
# The JSON parsing will handle errors downstream
|
| 1126 |
+
if stripped_args:
|
| 1127 |
self.args_started = True
|
| 1128 |
+
# If doesn't start with '{', try to fix common issues
|
| 1129 |
+
if not stripped_args.startswith("{"):
|
| 1130 |
+
# Check if it might be a valid JSON that's missing the opening brace
|
| 1131 |
+
# or if it's just plain text arguments
|
| 1132 |
+
args_part = "{" + args_part.lstrip()
|
| 1133 |
|
| 1134 |
if args_part:
|
| 1135 |
events.append({"type": "tool_delta", "index": self.tool_call_index, "content": args_part})
|
|
|
|
| 1151 |
if stripped.startswith("{"):
|
| 1152 |
self.args_started = True
|
| 1153 |
else:
|
| 1154 |
+
# Be more tolerant: accept arguments even without opening brace
|
| 1155 |
+
# Common issue: LLM outputs `key: value` instead of `{"key": "value"}`
|
| 1156 |
+
self.args_started = True
|
| 1157 |
+
|
| 1158 |
+
if self.args_started:
|
| 1159 |
+
keep_len = max(len(self.CALL_END), len(self.TOOL_END_PREFIX)) - 1
|
| 1160 |
+
if len(self.buffer) > keep_len:
|
| 1161 |
+
chunk_to_send = self.buffer[:-keep_len]
|
| 1162 |
+
if chunk_to_send:
|
| 1163 |
+
events.append({"type": "tool_delta", "index": self.tool_call_index, "content": chunk_to_send})
|
| 1164 |
+
self.buffer = self.buffer[-keep_len:]
|
| 1165 |
+
else:
|
| 1166 |
+
# Buffer has content but we haven't found JSON start yet
|
| 1167 |
+
# Wait a bit more for potential JSON start
|
| 1168 |
+
if len(self.buffer) > 100:
|
| 1169 |
+
# Timeout waiting for JSON, emit what we have
|
| 1170 |
+
events.append({"type": "tool_end", "index": self.tool_call_index})
|
| 1171 |
+
self.tool_call_index += 1
|
| 1172 |
+
self.current_call_id = None
|
| 1173 |
+
self.args_started = False
|
| 1174 |
+
self.state = "IN_TOOL_BLOCK"
|
| 1175 |
break
|
| 1176 |
|
| 1177 |
elif self.state == "IN_TAG":
|
|
|
|
| 1294 |
new_text = chunk.text or ""
|
| 1295 |
# Strip transient triple-backtick fence which is often added by the web-scraping backend
|
| 1296 |
# during streaming snapshots. We will restore the real ending after the loop.
|
| 1297 |
+
display_text = StreamingOutputFilter.TRAILING_FENCE_RE.sub('', new_text)
|
| 1298 |
|
| 1299 |
if len(display_text) > len(full_text):
|
| 1300 |
text_delta = display_text[len(full_text) :]
|
|
|
|
| 1500 |
new_text = chunk.text or ""
|
| 1501 |
# Strip transient triple-backtick fence which is often added by the web-scraping backend
|
| 1502 |
# during streaming snapshots. We will restore the real ending after the loop.
|
| 1503 |
+
display_text = StreamingOutputFilter.TRAILING_FENCE_RE.sub('', new_text)
|
| 1504 |
|
| 1505 |
if len(display_text) > len(full_text):
|
| 1506 |
text_delta = display_text[len(full_text) :]
|
|
|
|
| 1797 |
async for chunk in generator:
|
| 1798 |
last_chunk = chunk
|
| 1799 |
if chunk.text:
|
| 1800 |
+
display_text = StreamingOutputFilter.TRAILING_FENCE_RE.sub('', chunk.text)
|
| 1801 |
if len(display_text) > len(full_text):
|
| 1802 |
delta_text = display_text[len(full_text):]
|
| 1803 |
full_text = display_text
|
app/server/health.py
CHANGED
|
@@ -9,19 +9,20 @@ router = APIRouter()
|
|
| 9 |
|
| 10 |
@router.get("/health", response_model=HealthCheckResponse)
|
| 11 |
async def health_check():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
pool = GeminiClientPool()
|
| 13 |
db = LMDBConversationStore()
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
except Exception as e:
|
| 18 |
-
logger.error(f"Failed to initialize Gemini clients: {e}")
|
| 19 |
-
return HealthCheckResponse(ok=False, error=str(e))
|
| 20 |
-
|
| 21 |
client_status = pool.status()
|
| 22 |
|
| 23 |
if not all(client_status.values()):
|
| 24 |
-
logger.warning("One or more Gemini clients not running")
|
| 25 |
|
| 26 |
stat = db.stats()
|
| 27 |
if not stat:
|
|
|
|
| 9 |
|
| 10 |
@router.get("/health", response_model=HealthCheckResponse)
|
| 11 |
async def health_check():
|
| 12 |
+
"""
|
| 13 |
+
Health check endpoint.
|
| 14 |
+
Only checks current status without re-initializing clients.
|
| 15 |
+
Re-initialization is handled automatically by the pool when needed.
|
| 16 |
+
"""
|
| 17 |
pool = GeminiClientPool()
|
| 18 |
db = LMDBConversationStore()
|
| 19 |
|
| 20 |
+
# Only check status, don't re-initialize
|
| 21 |
+
# The pool handles client restarts automatically in acquire()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
client_status = pool.status()
|
| 23 |
|
| 24 |
if not all(client_status.values()):
|
| 25 |
+
logger.warning(f"One or more Gemini clients not running: {client_status}")
|
| 26 |
|
| 27 |
stat = db.stats()
|
| 28 |
if not stat:
|
app/services/client.py
CHANGED
|
@@ -61,9 +61,14 @@ def clean_citations(text: str) -> str:
|
|
| 61 |
return "".join(parts)
|
| 62 |
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
def _clean_text_chunk(text: str) -> str:
|
| 65 |
# A. Always remove source-style citations
|
| 66 |
-
text =
|
| 67 |
|
| 68 |
# B. Remove internal technical artifacts and links
|
| 69 |
# These are raw internal URLs or IDs that Gemini Web sometimes leaks.
|
|
@@ -73,7 +78,6 @@ def _clean_text_chunk(text: str) -> str:
|
|
| 73 |
# C. Remove standard [n] citations
|
| 74 |
# Use careful regex to avoid matching list items (start of line) or URLs
|
| 75 |
# Negative lookbehind to ensure we aren't part of a URL or Markdown link path
|
| 76 |
-
citation_pattern = re.compile(r"(?<!/)(?<![a-zA-Z0-9])\[\d+\]")
|
| 77 |
|
| 78 |
def repl(m):
|
| 79 |
try:
|
|
@@ -91,7 +95,7 @@ def _clean_text_chunk(text: str) -> str:
|
|
| 91 |
logger.error(f"Error in _clean_text_chunk.repl: {e}")
|
| 92 |
return m.group(0)
|
| 93 |
|
| 94 |
-
return
|
| 95 |
|
| 96 |
|
| 97 |
def _resolve(value: Any, fallback: Any):
|
|
|
|
| 61 |
return "".join(parts)
|
| 62 |
|
| 63 |
|
| 64 |
+
# Pre-compiled citation pattern for performance
|
| 65 |
+
CITATION_PATTERN = re.compile(r"(?<!/)(?<![a-zA-Z0-9])\[\d+\]")
|
| 66 |
+
SOURCE_CITATION_PATTERN = re.compile(r"【\d+†source】")
|
| 67 |
+
|
| 68 |
+
|
| 69 |
def _clean_text_chunk(text: str) -> str:
|
| 70 |
# A. Always remove source-style citations
|
| 71 |
+
text = SOURCE_CITATION_PATTERN.sub("", text)
|
| 72 |
|
| 73 |
# B. Remove internal technical artifacts and links
|
| 74 |
# These are raw internal URLs or IDs that Gemini Web sometimes leaks.
|
|
|
|
| 78 |
# C. Remove standard [n] citations
|
| 79 |
# Use careful regex to avoid matching list items (start of line) or URLs
|
| 80 |
# Negative lookbehind to ensure we aren't part of a URL or Markdown link path
|
|
|
|
| 81 |
|
| 82 |
def repl(m):
|
| 83 |
try:
|
|
|
|
| 95 |
logger.error(f"Error in _clean_text_chunk.repl: {e}")
|
| 96 |
return m.group(0)
|
| 97 |
|
| 98 |
+
return CITATION_PATTERN.sub(repl, text)
|
| 99 |
|
| 100 |
|
| 101 |
def _resolve(value: Any, fallback: Any):
|
app/services/pool.py
CHANGED
|
@@ -17,6 +17,7 @@ class GeminiClientPool(metaclass=Singleton):
|
|
| 17 |
self._id_map: Dict[str, GeminiClientWrapper] = {}
|
| 18 |
self._round_robin: deque[GeminiClientWrapper] = deque()
|
| 19 |
self._restart_locks: Dict[str, asyncio.Lock] = {}
|
|
|
|
| 20 |
|
| 21 |
if len(g_config.gemini.clients) == 0:
|
| 22 |
raise ValueError("No Gemini clients configured")
|
|
@@ -69,9 +70,19 @@ class GeminiClientPool(metaclass=Singleton):
|
|
| 69 |
f"Gemini client {client_id} is not running and could not be restarted"
|
| 70 |
)
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
if await self._ensure_client_ready(client):
|
| 76 |
return client
|
| 77 |
|
|
|
|
| 17 |
self._id_map: Dict[str, GeminiClientWrapper] = {}
|
| 18 |
self._round_robin: deque[GeminiClientWrapper] = deque()
|
| 19 |
self._restart_locks: Dict[str, asyncio.Lock] = {}
|
| 20 |
+
self._round_robin_lock = asyncio.Lock() # Lock for thread-safe round-robin
|
| 21 |
|
| 22 |
if len(g_config.gemini.clients) == 0:
|
| 23 |
raise ValueError("No Gemini clients configured")
|
|
|
|
| 70 |
f"Gemini client {client_id} is not running and could not be restarted"
|
| 71 |
)
|
| 72 |
|
| 73 |
+
# Thread-safe round-robin: try each client once
|
| 74 |
+
tried_clients = set()
|
| 75 |
+
while len(tried_clients) < len(self._round_robin):
|
| 76 |
+
# Atomically get next client
|
| 77 |
+
async with self._round_robin_lock:
|
| 78 |
+
client = self._round_robin[0]
|
| 79 |
+
self._round_robin.rotate(-1)
|
| 80 |
+
if client in tried_clients:
|
| 81 |
+
# Already tried all clients
|
| 82 |
+
break
|
| 83 |
+
tried_clients.add(client)
|
| 84 |
+
|
| 85 |
+
# Check readiness outside lock to avoid blocking other requests
|
| 86 |
if await self._ensure_client_ready(client):
|
| 87 |
return client
|
| 88 |
|
app/utils/helper.py
CHANGED
|
@@ -1,12 +1,15 @@
|
|
| 1 |
import base64
|
|
|
|
| 2 |
import hashlib
|
| 3 |
import mimetypes
|
|
|
|
| 4 |
import re
|
| 5 |
import reprlib
|
| 6 |
import struct
|
| 7 |
import tempfile
|
|
|
|
| 8 |
from pathlib import Path
|
| 9 |
-
from
|
| 10 |
|
| 11 |
import httpx
|
| 12 |
import orjson
|
|
@@ -14,6 +17,88 @@ from loguru import logger
|
|
| 14 |
|
| 15 |
from ..models import FunctionCall, Message, ToolCall
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
|
| 18 |
TOOL_WRAP_HINT = (
|
| 19 |
"\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n"
|
|
@@ -34,6 +119,98 @@ TOOL_HINT_LINE_START = _hint_lines[0] if _hint_lines else ""
|
|
| 34 |
TOOL_HINT_LINE_END = _hint_lines[-1] if _hint_lines else ""
|
| 35 |
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
def add_tag(role: str, content: str, unclose: bool = False) -> str:
|
| 38 |
"""Surround content with role tags"""
|
| 39 |
if role not in VALID_TAG_ROLES:
|
|
@@ -198,25 +375,12 @@ def _process_tools_internal(text: str, extract: bool = True) -> tuple[str, list[
|
|
| 198 |
|
| 199 |
arguments = raw_args
|
| 200 |
try:
|
| 201 |
-
parsed_args =
|
| 202 |
arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
|
| 203 |
-
except
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
try:
|
| 208 |
-
parsed_args = orjson.loads(potential_json)
|
| 209 |
-
arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode(
|
| 210 |
-
"utf-8"
|
| 211 |
-
)
|
| 212 |
-
except orjson.JSONDecodeError:
|
| 213 |
-
logger.warning(
|
| 214 |
-
f"Failed to parse extracted JSON arguments for '{name}': {reprlib.repr(potential_json)}"
|
| 215 |
-
)
|
| 216 |
-
else:
|
| 217 |
-
logger.warning(
|
| 218 |
-
f"Failed to parse tool call arguments for '{name}'. Passing raw string: {reprlib.repr(raw_args)}"
|
| 219 |
-
)
|
| 220 |
|
| 221 |
index = len(tool_calls)
|
| 222 |
seed = f"{name}:{arguments}:{index}".encode("utf-8")
|
|
|
|
| 1 |
import base64
|
| 2 |
+
import functools
|
| 3 |
import hashlib
|
| 4 |
import mimetypes
|
| 5 |
+
import random
|
| 6 |
import re
|
| 7 |
import reprlib
|
| 8 |
import struct
|
| 9 |
import tempfile
|
| 10 |
+
import json
|
| 11 |
from pathlib import Path
|
| 12 |
+
from typing import Any, Callable, TypeVar
|
| 13 |
|
| 14 |
import httpx
|
| 15 |
import orjson
|
|
|
|
| 17 |
|
| 18 |
from ..models import FunctionCall, Message, ToolCall
|
| 19 |
|
| 20 |
+
T = TypeVar("T")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def retry_with_backoff(
|
| 24 |
+
max_retries: int = 3,
|
| 25 |
+
base_delay: float = 1.0,
|
| 26 |
+
max_delay: float = 30.0,
|
| 27 |
+
exponential_base: float = 2.0,
|
| 28 |
+
retryable_exceptions: tuple = (Exception,),
|
| 29 |
+
):
|
| 30 |
+
"""
|
| 31 |
+
Decorator that retries a function with exponential backoff.
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
max_retries: Maximum number of retry attempts
|
| 35 |
+
base_delay: Initial delay in seconds
|
| 36 |
+
max_delay: Maximum delay cap in seconds
|
| 37 |
+
exponential_base: Base for exponential backoff calculation
|
| 38 |
+
retryable_exceptions: Tuple of exception types to retry on
|
| 39 |
+
"""
|
| 40 |
+
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
| 41 |
+
@functools.wraps(func)
|
| 42 |
+
async def async_wrapper(*args, **kwargs) -> T:
|
| 43 |
+
last_exception = None
|
| 44 |
+
for attempt in range(max_retries + 1):
|
| 45 |
+
try:
|
| 46 |
+
return await func(*args, **kwargs)
|
| 47 |
+
except retryable_exceptions as e:
|
| 48 |
+
last_exception = e
|
| 49 |
+
if attempt == max_retries:
|
| 50 |
+
logger.error(f"All {max_retries + 1} attempts failed for {func.__name__}")
|
| 51 |
+
raise
|
| 52 |
+
|
| 53 |
+
# Calculate delay with exponential backoff and jitter
|
| 54 |
+
delay = min(base_delay * (exponential_base ** attempt), max_delay)
|
| 55 |
+
jitter = random.uniform(0.1, 0.3) * delay
|
| 56 |
+
actual_delay = delay + jitter
|
| 57 |
+
|
| 58 |
+
logger.warning(
|
| 59 |
+
f"Attempt {attempt + 1}/{max_retries + 1} failed for {func.__name__}: {e}. "
|
| 60 |
+
f"Retrying in {actual_delay:.1f}s..."
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
import asyncio
|
| 64 |
+
await asyncio.sleep(actual_delay)
|
| 65 |
+
|
| 66 |
+
raise last_exception
|
| 67 |
+
|
| 68 |
+
@functools.wraps(func)
|
| 69 |
+
def sync_wrapper(*args, **kwargs) -> T:
|
| 70 |
+
last_exception = None
|
| 71 |
+
for attempt in range(max_retries + 1):
|
| 72 |
+
try:
|
| 73 |
+
return func(*args, **kwargs)
|
| 74 |
+
except retryable_exceptions as e:
|
| 75 |
+
last_exception = e
|
| 76 |
+
if attempt == max_retries:
|
| 77 |
+
logger.error(f"All {max_retries + 1} attempts failed for {func.__name__}")
|
| 78 |
+
raise
|
| 79 |
+
|
| 80 |
+
delay = min(base_delay * (exponential_base ** attempt), max_delay)
|
| 81 |
+
jitter = random.uniform(0.1, 0.3) * delay
|
| 82 |
+
actual_delay = delay + jitter
|
| 83 |
+
|
| 84 |
+
logger.warning(
|
| 85 |
+
f"Attempt {attempt + 1}/{max_retries + 1} failed for {func.__name__}: {e}. "
|
| 86 |
+
f"Retrying in {actual_delay:.1f}s..."
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
import time
|
| 90 |
+
time.sleep(actual_delay)
|
| 91 |
+
|
| 92 |
+
raise last_exception
|
| 93 |
+
|
| 94 |
+
# Return appropriate wrapper based on whether function is async
|
| 95 |
+
import asyncio
|
| 96 |
+
if asyncio.iscoroutinefunction(func):
|
| 97 |
+
return async_wrapper
|
| 98 |
+
return sync_wrapper
|
| 99 |
+
|
| 100 |
+
return decorator
|
| 101 |
+
|
| 102 |
VALID_TAG_ROLES = {"user", "assistant", "system", "tool"}
|
| 103 |
TOOL_WRAP_HINT = (
|
| 104 |
"\nYou MUST wrap every tool call response inside a single [function_calls] block exactly like:\n"
|
|
|
|
| 119 |
TOOL_HINT_LINE_END = _hint_lines[-1] if _hint_lines else ""
|
| 120 |
|
| 121 |
|
| 122 |
+
def safe_json_loads(s: str) -> Any:
|
| 123 |
+
"""
|
| 124 |
+
Attempt to parse JSON with multiple fallback strategies for LLM-generated content.
|
| 125 |
+
Handles common issues like:
|
| 126 |
+
- Markdown code blocks
|
| 127 |
+
- Trailing commas
|
| 128 |
+
- Missing opening/closing braces
|
| 129 |
+
- Unescaped control characters
|
| 130 |
+
- Single quotes instead of double quotes
|
| 131 |
+
- Python-style literals (None, True, False)
|
| 132 |
+
"""
|
| 133 |
+
if not s:
|
| 134 |
+
return None
|
| 135 |
+
|
| 136 |
+
# 1. Try direct parse with orjson
|
| 137 |
+
try:
|
| 138 |
+
return orjson.loads(s)
|
| 139 |
+
except orjson.JSONDecodeError:
|
| 140 |
+
pass
|
| 141 |
+
|
| 142 |
+
# 2. Clean markdown and extract JSON block
|
| 143 |
+
cleaned = s.strip()
|
| 144 |
+
# Remove markdown code blocks
|
| 145 |
+
cleaned = re.sub(r"```(?:json)?\s*(.*?)\s*```", r"\1", cleaned, flags=re.DOTALL)
|
| 146 |
+
|
| 147 |
+
# Extract the first { and last }
|
| 148 |
+
match = re.search(r"(\{.*\})", cleaned, re.DOTALL)
|
| 149 |
+
if match:
|
| 150 |
+
cleaned = match.group(1)
|
| 151 |
+
|
| 152 |
+
# 3. Fix common syntax errors
|
| 153 |
+
# Fix trailing commas
|
| 154 |
+
cleaned = re.sub(r",\s*([\}\]])", r"\1", cleaned)
|
| 155 |
+
|
| 156 |
+
# Fix missing opening brace (common in LLM output)
|
| 157 |
+
if cleaned.strip() and not cleaned.strip().startswith("{"):
|
| 158 |
+
# Check if it looks like JSON content
|
| 159 |
+
if ":" in cleaned or "[" in cleaned:
|
| 160 |
+
cleaned = "{" + cleaned
|
| 161 |
+
|
| 162 |
+
# Fix missing closing brace
|
| 163 |
+
open_braces = cleaned.count("{") - cleaned.count("}")
|
| 164 |
+
if open_braces > 0:
|
| 165 |
+
cleaned = cleaned + "}" * open_braces
|
| 166 |
+
|
| 167 |
+
# Fix unescaped newlines in strings
|
| 168 |
+
cleaned = re.sub(r'(?<!\\)\n(?=[^"]*"[^"]*$)', r"\\n", cleaned)
|
| 169 |
+
|
| 170 |
+
# 4. Try orjson again
|
| 171 |
+
try:
|
| 172 |
+
return orjson.loads(cleaned)
|
| 173 |
+
except orjson.JSONDecodeError:
|
| 174 |
+
pass
|
| 175 |
+
|
| 176 |
+
# 5. Try to fix single quotes to double quotes
|
| 177 |
+
try:
|
| 178 |
+
# Simple heuristic: if it has single quotes around keys/values
|
| 179 |
+
fixed = re.sub(r"'([^']+)'", r'"\1"', cleaned)
|
| 180 |
+
return orjson.loads(fixed)
|
| 181 |
+
except orjson.JSONDecodeError:
|
| 182 |
+
pass
|
| 183 |
+
|
| 184 |
+
# 6. Fallback to standard json module with strict=False
|
| 185 |
+
# strict=False allows control characters like unescaped newlines inside strings
|
| 186 |
+
try:
|
| 187 |
+
return json.loads(cleaned, strict=False)
|
| 188 |
+
except Exception:
|
| 189 |
+
pass
|
| 190 |
+
|
| 191 |
+
# 7. Last resort: try to extract key-value pairs manually
|
| 192 |
+
try:
|
| 193 |
+
result = {}
|
| 194 |
+
# Match patterns like "key": "value" or "key": value
|
| 195 |
+
pattern = r'"([^"]+)"\s*:\s*(?:"([^"]*)"|([\w\d]+)|(\{[^}]*\})|(\[[^\]]*\]))'
|
| 196 |
+
for m in re.finditer(pattern, cleaned):
|
| 197 |
+
key = m.group(1)
|
| 198 |
+
value = m.group(2) or m.group(3) or m.group(4) or m.group(5)
|
| 199 |
+
if value:
|
| 200 |
+
# Try to parse nested structures
|
| 201 |
+
try:
|
| 202 |
+
value = orjson.loads(value)
|
| 203 |
+
except:
|
| 204 |
+
pass
|
| 205 |
+
result[key] = value
|
| 206 |
+
if result:
|
| 207 |
+
return result
|
| 208 |
+
except Exception:
|
| 209 |
+
pass
|
| 210 |
+
|
| 211 |
+
raise ValueError(f"Failed to parse JSON: {reprlib.repr(s)}")
|
| 212 |
+
|
| 213 |
+
|
| 214 |
def add_tag(role: str, content: str, unclose: bool = False) -> str:
|
| 215 |
"""Surround content with role tags"""
|
| 216 |
if role not in VALID_TAG_ROLES:
|
|
|
|
| 375 |
|
| 376 |
arguments = raw_args
|
| 377 |
try:
|
| 378 |
+
parsed_args = safe_json_loads(raw_args)
|
| 379 |
arguments = orjson.dumps(parsed_args, option=orjson.OPT_SORT_KEYS).decode("utf-8")
|
| 380 |
+
except Exception as e:
|
| 381 |
+
logger.warning(
|
| 382 |
+
f"Failed to parse tool call arguments for '{name}': {e}. Passing raw string: {reprlib.repr(raw_args)}"
|
| 383 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
|
| 385 |
index = len(tool_calls)
|
| 386 |
seed = f"{name}:{arguments}:{index}".encode("utf-8")
|
config/config.yaml
CHANGED
|
@@ -19,8 +19,8 @@ cors:
|
|
| 19 |
gemini:
|
| 20 |
clients:
|
| 21 |
- id: "example-id-1" # Arbitrary client ID
|
| 22 |
-
secure_1psid: "g.
|
| 23 |
-
secure_1psidts: "sidts-
|
| 24 |
proxy: null # Optional proxy URL (null/empty means direct connection)
|
| 25 |
timeout: 120 # Init timeout in seconds
|
| 26 |
auto_refresh: true # Auto-refresh session cookies
|
|
|
|
| 19 |
gemini:
|
| 20 |
clients:
|
| 21 |
- id: "example-id-1" # Arbitrary client ID
|
| 22 |
+
secure_1psid: "g.a0006ghLMnrH-xrCpx9MPvW3muBiYT-ndrFy-0jmTZqWQYn6pP3ofLfBrtYDuTRlVmiMppg2LwACgYKATISARcSFQHGX2MisJCoQlKW5k8gkHYqbDf6TBoVAUF8yKqSTWG_iCHAyw2Yhwy6-gF40076"
|
| 23 |
+
secure_1psidts: "sidts-CjEB7I_69CNJfUZU4ue_qIqhCNx91dOCLnIy_DSoOAonz924_M99NJe-qnDvDXjPUe5uEAA"
|
| 24 |
proxy: null # Optional proxy URL (null/empty means direct connection)
|
| 25 |
timeout: 120 # Init timeout in seconds
|
| 26 |
auto_refresh: true # Auto-refresh session cookies
|