f2d90b38 commited on
Commit
a98fd5a
Β·
verified Β·
1 Parent(s): 0752c0b

Upload 6 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN pip install --no-cache-dir httpx fastapi uvicorn
6
+
7
+ COPY main.py openai.py ./
8
+
9
+ EXPOSE 7860
10
+
11
+ CMD ["sh", "-c", "python -m uvicorn openai:app --host 0.0.0.0 --port 7860 --workers ${UVICORN_WORKERS:-1}"]
__pycache__/main.cpython-313.pyc ADDED
Binary file (18.5 kB). View file
 
__pycache__/openai.cpython-313.pyc ADDED
Binary file (17 kB). View file
 
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
main.py ADDED
@@ -0,0 +1,456 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """chat.z.ai reverse-engineered Python client."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import base64
7
+ import hashlib
8
+ import hmac
9
+ import json
10
+ import os
11
+ import time
12
+ import uuid
13
+ from datetime import datetime, timezone, timedelta
14
+ from urllib.parse import urlencode
15
+
16
+ import httpx
17
+
18
+ BASE_URL = "https://chat.z.ai"
19
+ HMAC_SECRET = "key-@@@@)))()((9))-xxxx&&&%%%%%"
20
+ FE_VERSION = "prod-fe-1.0.231"
21
+ CLIENT_VERSION = "0.0.1"
22
+ DEFAULT_MODEL = "glm-5"
23
+ USER_AGENT = (
24
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
25
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
26
+ "Chrome/144.0.0.0 Safari/537.36"
27
+ )
28
+
29
+
30
+ def _env_float(name: str, default: float) -> float:
31
+ raw = os.getenv(name)
32
+ if raw is None:
33
+ return default
34
+ try:
35
+ return float(raw)
36
+ except ValueError:
37
+ return default
38
+
39
+
40
+ def _env_int(name: str, default: int) -> int:
41
+ raw = os.getenv(name)
42
+ if raw is None:
43
+ return default
44
+ try:
45
+ return int(raw)
46
+ except ValueError:
47
+ return default
48
+
49
+
50
+ def _env_bool(name: str, default: bool = False) -> bool:
51
+ raw = os.getenv(name)
52
+ if raw is None:
53
+ return default
54
+ return raw.strip().lower() in {"1", "true", "yes", "on"}
55
+
56
+
57
+ HTTP_TIMEOUT_SECONDS = max(1.0, _env_float("ZAI_HTTP_TIMEOUT_SECONDS", 60.0))
58
+ HTTP_CONNECT_TIMEOUT_SECONDS = max(
59
+ 1.0, _env_float("ZAI_HTTP_CONNECT_TIMEOUT_SECONDS", 10.0)
60
+ )
61
+ HTTP_MAX_CONNECTIONS = max(1, _env_int("ZAI_HTTP_MAX_CONNECTIONS", 512))
62
+ HTTP_MAX_KEEPALIVE_CONNECTIONS = max(
63
+ 1,
64
+ min(
65
+ HTTP_MAX_CONNECTIONS,
66
+ _env_int("ZAI_HTTP_MAX_KEEPALIVE_CONNECTIONS", 256),
67
+ ),
68
+ )
69
+ HTTP_KEEPALIVE_EXPIRY_SECONDS = max(
70
+ 1.0, _env_float("ZAI_HTTP_KEEPALIVE_EXPIRY_SECONDS", 30.0)
71
+ )
72
+ HTTP2_ENABLED = _env_bool("ZAI_HTTP2_ENABLED", False)
73
+
74
+
75
+ class ZaiClient:
76
+ def __init__(self) -> None:
77
+ timeout = httpx.Timeout(
78
+ timeout=HTTP_TIMEOUT_SECONDS,
79
+ connect=HTTP_CONNECT_TIMEOUT_SECONDS,
80
+ )
81
+ limits = httpx.Limits(
82
+ max_connections=HTTP_MAX_CONNECTIONS,
83
+ max_keepalive_connections=HTTP_MAX_KEEPALIVE_CONNECTIONS,
84
+ keepalive_expiry=HTTP_KEEPALIVE_EXPIRY_SECONDS,
85
+ )
86
+ self.client = httpx.AsyncClient(
87
+ base_url=BASE_URL,
88
+ timeout=timeout,
89
+ limits=limits,
90
+ http2=HTTP2_ENABLED,
91
+ headers={
92
+ "User-Agent": USER_AGENT,
93
+ "Accept-Language": "zh-CN",
94
+ "Referer": f"{BASE_URL}/",
95
+ "Origin": BASE_URL,
96
+ },
97
+ )
98
+ self.token: str | None = None
99
+ self.user_id: str | None = None
100
+ self.username: str | None = None
101
+
102
+ async def close(self) -> None:
103
+ await self.client.aclose()
104
+
105
+ # ── auth ────────────────────────────────────────────────────────
106
+
107
+ async def auth_as_guest(self) -> dict:
108
+ """GET /api/v1/auths/ β€” creates a guest session and returns user info."""
109
+ resp = await self.client.get(
110
+ "/api/v1/auths/",
111
+ headers={"Content-Type": "application/json"},
112
+ )
113
+ resp.raise_for_status()
114
+ data = resp.json()
115
+ self.token = data["token"]
116
+ self.user_id = data["id"]
117
+ self.username = data.get("name") or data.get("email", "").split("@")[0]
118
+ return data
119
+
120
+ # ── models ──────────────────────────────────────────────────────
121
+
122
+ async def get_models(self) -> list:
123
+ """GET /api/models β€” returns available model list."""
124
+ resp = await self.client.get(
125
+ "/api/models",
126
+ headers={
127
+ "Content-Type": "application/json",
128
+ "Accept": "application/json",
129
+ **({"Authorization": f"Bearer {self.token}"} if self.token else {}),
130
+ },
131
+ )
132
+ resp.raise_for_status()
133
+ return resp.json()
134
+
135
+ # ── chat CRUD ───────────────────────────────────────────────────
136
+
137
+ async def create_chat(
138
+ self,
139
+ user_message: str,
140
+ model: str = DEFAULT_MODEL,
141
+ ) -> dict:
142
+ """POST /api/v1/chats/new β€” creates a new chat session."""
143
+ msg_id = str(uuid.uuid4())
144
+ ts = int(time.time())
145
+ body = {
146
+ "chat": {
147
+ "id": "",
148
+ "title": "ζ–°θŠε€©",
149
+ "models": [model],
150
+ "params": {},
151
+ "history": {
152
+ "messages": {
153
+ msg_id: {
154
+ "id": msg_id,
155
+ "parentId": None,
156
+ "childrenIds": [],
157
+ "role": "user",
158
+ "content": user_message,
159
+ "timestamp": ts,
160
+ "models": [model],
161
+ }
162
+ },
163
+ "currentId": msg_id,
164
+ },
165
+ "tags": [],
166
+ "flags": [],
167
+ "features": [
168
+ {
169
+ "type": "tool_selector",
170
+ "server": "tool_selector_h",
171
+ "status": "hidden",
172
+ }
173
+ ],
174
+ "mcp_servers": [],
175
+ "enable_thinking": True,
176
+ "auto_web_search": False,
177
+ "message_version": 1,
178
+ "extra": {},
179
+ "timestamp": int(time.time() * 1000),
180
+ }
181
+ }
182
+ resp = await self.client.post(
183
+ "/api/v1/chats/new",
184
+ headers={
185
+ "Content-Type": "application/json",
186
+ "Accept": "application/json",
187
+ **({"Authorization": f"Bearer {self.token}"} if self.token else {}),
188
+ },
189
+ json=body,
190
+ )
191
+ resp.raise_for_status()
192
+ return resp.json()
193
+
194
+ # ── signature ───────────────────────────────────────────────────
195
+
196
+ @staticmethod
197
+ def _generate_signature(
198
+ sorted_payload: str, prompt: str, timestamp: str
199
+ ) -> str:
200
+ """
201
+ Two-layer HMAC-SHA256 matching DLHfQWwv.js.
202
+
203
+ 1. b64_prompt = base64(utf8(prompt))
204
+ 2. message = "{sorted_payload}|{b64_prompt}|{timestamp}"
205
+ 3. time_bucket = floor(int(timestamp) / 300_000)
206
+ 4. derived_key = HMAC-SHA256(HMAC_SECRET, str(time_bucket)) β†’ hex string
207
+ 5. signature = HMAC-SHA256(derived_key_hex_bytes, message) β†’ hex
208
+ """
209
+ b64_prompt = base64.b64encode(prompt.encode("utf-8")).decode("ascii")
210
+ message = f"{sorted_payload}|{b64_prompt}|{timestamp}"
211
+ time_bucket = int(timestamp) // (5 * 60 * 1000)
212
+
213
+ derived_key_hex = hmac.new(
214
+ HMAC_SECRET.encode("utf-8"),
215
+ str(time_bucket).encode("utf-8"),
216
+ hashlib.sha256,
217
+ ).hexdigest()
218
+
219
+ signature = hmac.new(
220
+ derived_key_hex.encode("utf-8"),
221
+ message.encode("utf-8"),
222
+ hashlib.sha256,
223
+ ).hexdigest()
224
+ return signature
225
+
226
+ def _build_query_and_signature(
227
+ self, prompt: str, chat_id: str
228
+ ) -> tuple[str, str]:
229
+ """Build the full URL query string and X-Signature header.
230
+
231
+ Returns (full_query_string, signature).
232
+ """
233
+ timestamp_ms = str(int(time.time() * 1000))
234
+ request_id = str(uuid.uuid4())
235
+
236
+ now = datetime.now(timezone.utc)
237
+
238
+ # Core params (used for sortedPayload)
239
+ core = {
240
+ "timestamp": timestamp_ms,
241
+ "requestId": request_id,
242
+ "user_id": self.user_id,
243
+ }
244
+
245
+ # sortedPayload: Object.entries(core).sort(by key).join(",")
246
+ sorted_payload = ",".join(
247
+ f"{k},{v}" for k, v in sorted(core.items(), key=lambda x: x[0])
248
+ )
249
+
250
+ # Compute signature over the prompt
251
+ signature = self._generate_signature(sorted_payload, prompt, timestamp_ms)
252
+
253
+ # Browser/device fingerprint params
254
+ extra = {
255
+ "version": CLIENT_VERSION,
256
+ "platform": "web",
257
+ "token": self.token or "",
258
+ "user_agent": USER_AGENT,
259
+ "language": "zh-CN",
260
+ "languages": "zh-CN",
261
+ "timezone": "Asia/Shanghai",
262
+ "cookie_enabled": "true",
263
+ "screen_width": "1920",
264
+ "screen_height": "1080",
265
+ "screen_resolution": "1920x1080",
266
+ "viewport_height": "919",
267
+ "viewport_width": "944",
268
+ "viewport_size": "944x919",
269
+ "color_depth": "24",
270
+ "pixel_ratio": "1.25",
271
+ "current_url": f"{BASE_URL}/c/{chat_id}",
272
+ "pathname": f"/c/{chat_id}",
273
+ "search": "",
274
+ "hash": "",
275
+ "host": "chat.z.ai",
276
+ "hostname": "chat.z.ai",
277
+ "protocol": "https:",
278
+ "referrer": "",
279
+ "title": "Z.ai - Free AI Chatbot & Agent powered by GLM-5 & GLM-4.7",
280
+ "timezone_offset": "-480",
281
+ "local_time": now.strftime("%Y-%m-%dT%H:%M:%S.")
282
+ + f"{now.microsecond // 1000:03d}Z",
283
+ "utc_time": now.strftime("%a, %d %b %Y %H:%M:%S GMT"),
284
+ "is_mobile": "false",
285
+ "is_touch": "false",
286
+ "max_touch_points": "10",
287
+ "browser_name": "Chrome",
288
+ "os_name": "Windows",
289
+ "signature_timestamp": timestamp_ms,
290
+ }
291
+
292
+ all_params = {**core, **extra}
293
+ query_string = urlencode(all_params)
294
+
295
+ return query_string, signature
296
+
297
+ # ── chat completions (SSE) ──────────────────────────────────────
298
+
299
+ async def chat_completions(
300
+ self,
301
+ chat_id: str,
302
+ messages: list[dict],
303
+ prompt: str,
304
+ *,
305
+ model: str = DEFAULT_MODEL,
306
+ parent_message_id: str | None = None,
307
+ tools: list[dict] | None = None,
308
+ ):
309
+ """POST /api/v2/chat/completions β€” streams SSE response.
310
+
311
+ Yields the full event ``data`` dict for each SSE frame.
312
+ """
313
+ query_string, signature = self._build_query_and_signature(prompt, chat_id)
314
+
315
+ msg_id = str(uuid.uuid4())
316
+ user_msg_id = str(uuid.uuid4())
317
+
318
+ now = datetime.now(timezone(timedelta(hours=8)))
319
+ variables = {
320
+ "{{USER_NAME}}": self.username or "Guest",
321
+ "{{USER_LOCATION}}": "Unknown",
322
+ "{{CURRENT_DATETIME}}": now.strftime("%Y-%m-%d %H:%M:%S"),
323
+ "{{CURRENT_DATE}}": now.strftime("%Y-%m-%d"),
324
+ "{{CURRENT_TIME}}": now.strftime("%H:%M:%S"),
325
+ "{{CURRENT_WEEKDAY}}": now.strftime("%A"),
326
+ "{{CURRENT_TIMEZONE}}": "Asia/Shanghai",
327
+ "{{USER_LANGUAGE}}": "zh-CN",
328
+ }
329
+
330
+ body = {
331
+ "stream": True,
332
+ "model": model,
333
+ "messages": messages,
334
+ "signature_prompt": prompt,
335
+ "params": {},
336
+ "extra": {},
337
+ "features": {
338
+ "image_generation": False,
339
+ "web_search": False,
340
+ "auto_web_search": False,
341
+ "preview_mode": True,
342
+ "flags": [],
343
+ "enable_thinking": True,
344
+ },
345
+ "variables": variables,
346
+ "chat_id": chat_id,
347
+ "id": msg_id,
348
+ "current_user_message_id": user_msg_id,
349
+ "current_user_message_parent_id": parent_message_id,
350
+ "background_tasks": {
351
+ "title_generation": True,
352
+ "tags_generation": True,
353
+ },
354
+ }
355
+ if tools:
356
+ body["tools"] = tools
357
+
358
+ headers = {
359
+ "Content-Type": "application/json",
360
+ "Accept": "*/*",
361
+ "Accept-Language": "zh-CN",
362
+ "X-FE-Version": FE_VERSION,
363
+ "X-Signature": signature,
364
+ **({"Authorization": f"Bearer {self.token}"} if self.token else {}),
365
+ }
366
+
367
+ url = f"{BASE_URL}/api/v2/chat/completions?{query_string}"
368
+
369
+ async with self.client.stream(
370
+ "POST", url, headers=headers, json=body,
371
+ ) as resp:
372
+ if resp.status_code != 200:
373
+ error_body = await resp.aread()
374
+ raise RuntimeError(
375
+ f"chat/completions {resp.status_code}: {error_body.decode()}"
376
+ )
377
+ async for line in resp.aiter_lines():
378
+ if not line.startswith("data: "):
379
+ continue
380
+ raw = line[6:]
381
+ if raw.strip() == "[DONE]":
382
+ return
383
+ try:
384
+ event = json.loads(raw)
385
+ except json.JSONDecodeError:
386
+ continue
387
+ data = event.get("data", {})
388
+ yield data
389
+ if data.get("done"):
390
+ return
391
+
392
+
393
+ async def main() -> None:
394
+ client = ZaiClient()
395
+ try:
396
+ # 1. Authenticate as guest
397
+ print("[1] Authenticating as guest...")
398
+ auth = await client.auth_as_guest()
399
+ print(f" user_id : {auth['id']}")
400
+ print(f" email : {auth.get('email', 'N/A')}")
401
+ print(f" token : {auth['token'][:40]}...")
402
+
403
+ # 2. Fetch models
404
+ print("\n[2] Fetching models...")
405
+ models_resp = await client.get_models()
406
+ if isinstance(models_resp, dict) and "data" in models_resp:
407
+ names = [m.get("id", m.get("name", "?")) for m in models_resp["data"]]
408
+ elif isinstance(models_resp, list):
409
+ names = [m.get("id", m.get("name", "?")) for m in models_resp]
410
+ else:
411
+ names = [str(models_resp)[:80]]
412
+ print(f" models : {', '.join(names[:10])}")
413
+
414
+ # 3. Create chat
415
+ user_message = "Hello"
416
+ print(f"\n[3] Creating chat with first message: {user_message!r}")
417
+ messages = [{"role": "user", "content": user_message}]
418
+ chat = await client.create_chat(user_message)
419
+ chat_id = chat["id"]
420
+ print(f" chat_id : {chat_id}")
421
+
422
+ # 4. Stream chat completions
423
+ print(f"\n[4] Streaming chat completions (model={DEFAULT_MODEL})...\n")
424
+ messages = [{"role": "user", "content": user_message}]
425
+
426
+ thinking_started = False
427
+ answer_started = False
428
+ async for data in client.chat_completions(
429
+ chat_id=chat_id,
430
+ messages=messages,
431
+ prompt=user_message,
432
+ ):
433
+ phase = data.get("phase", "")
434
+ delta = data.get("delta_content", "")
435
+ if phase == "thinking":
436
+ if not thinking_started:
437
+ print("[thinking] ", end="", flush=True)
438
+ thinking_started = True
439
+ print(delta, end="", flush=True)
440
+ elif phase == "answer":
441
+ if not answer_started:
442
+ if thinking_started:
443
+ print("\n")
444
+ print("[answer] ", end="", flush=True)
445
+ answer_started = True
446
+ print(delta, end="", flush=True)
447
+ elif phase == "done":
448
+ break
449
+ print("\n\n[done]")
450
+
451
+ finally:
452
+ await client.close()
453
+
454
+
455
+ if __name__ == "__main__":
456
+ asyncio.run(main())
openai.py ADDED
@@ -0,0 +1,491 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """OpenAI-compatible proxy server for chat.z.ai."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import json
7
+ import os
8
+ import time
9
+ import uuid
10
+ from contextlib import asynccontextmanager
11
+
12
+ import uvicorn
13
+ from fastapi import FastAPI, Request
14
+ from fastapi.responses import JSONResponse, StreamingResponse
15
+
16
+ from main import ZaiClient
17
+
18
+ # ── Session Pool ─────────────────────────────────────────────────────
19
+
20
+
21
+ def _env_float(name: str, default: float) -> float:
22
+ raw = os.getenv(name)
23
+ if raw is None:
24
+ return default
25
+ try:
26
+ return float(raw)
27
+ except ValueError:
28
+ return default
29
+
30
+
31
+ AUTH_REFRESH_MIN_INTERVAL_SECONDS = _env_float(
32
+ "ZAI_AUTH_REFRESH_MIN_INTERVAL_SECONDS", 2.0
33
+ )
34
+
35
+
36
+ class SessionPool:
37
+ """Manages a single ZaiClient instance with automatic auth refresh."""
38
+
39
+ def __init__(self) -> None:
40
+ self._client = ZaiClient()
41
+ self._lock = asyncio.Lock()
42
+ self._authed = False
43
+ self._last_auth_refresh_at = 0.0
44
+ self._refresh_min_interval = max(0.0, AUTH_REFRESH_MIN_INTERVAL_SECONDS)
45
+
46
+ async def close(self) -> None:
47
+ await self._client.close()
48
+
49
+ async def ensure_auth(self) -> None:
50
+ """Authenticate if not already done."""
51
+ if self._authed:
52
+ return
53
+ async with self._lock:
54
+ if self._authed:
55
+ return
56
+ await self._client.auth_as_guest()
57
+ self._authed = True
58
+ self._last_auth_refresh_at = time.monotonic()
59
+
60
+ async def refresh_auth(self, *, force: bool = False) -> None:
61
+ """Refresh the guest token with single-flight behavior."""
62
+ now = time.monotonic()
63
+ if (
64
+ not force
65
+ and self._authed
66
+ and now - self._last_auth_refresh_at < self._refresh_min_interval
67
+ ):
68
+ return
69
+ async with self._lock:
70
+ now = time.monotonic()
71
+ if (
72
+ not force
73
+ and self._authed
74
+ and now - self._last_auth_refresh_at < self._refresh_min_interval
75
+ ):
76
+ return
77
+ await self._client.auth_as_guest()
78
+ self._authed = True
79
+ self._last_auth_refresh_at = time.monotonic()
80
+
81
+ async def get_models(self) -> list | dict:
82
+ await self.ensure_auth()
83
+ return await self._client.get_models()
84
+
85
+ async def create_chat(self, user_message: str, model: str) -> dict:
86
+ await self.ensure_auth()
87
+ return await self._client.create_chat(user_message, model)
88
+
89
+ def chat_completions(
90
+ self,
91
+ chat_id: str,
92
+ messages: list[dict],
93
+ prompt: str,
94
+ *,
95
+ model: str,
96
+ tools: list[dict] | None = None,
97
+ ):
98
+ return self._client.chat_completions(
99
+ chat_id=chat_id,
100
+ messages=messages,
101
+ prompt=prompt,
102
+ model=model,
103
+ tools=tools,
104
+ )
105
+
106
+
107
+ pool = SessionPool()
108
+
109
+ # ── FastAPI app ──────────────────────────────────────────────────────
110
+
111
+
112
+ @asynccontextmanager
113
+ async def lifespan(_app: FastAPI):
114
+ await pool.ensure_auth()
115
+ yield
116
+ await pool.close()
117
+
118
+
119
+ app = FastAPI(lifespan=lifespan)
120
+
121
+ # ── Helpers ──────────────────────────────────────────────────────────
122
+
123
+
124
+ def _make_id() -> str:
125
+ return f"chatcmpl-{uuid.uuid4().hex[:29]}"
126
+
127
+
128
+ def _openai_chunk(
129
+ completion_id: str,
130
+ model: str,
131
+ *,
132
+ content: str | None = None,
133
+ reasoning_content: str | None = None,
134
+ finish_reason: str | None = None,
135
+ ) -> dict:
136
+ delta: dict = {}
137
+ if content is not None:
138
+ delta["content"] = content
139
+ if reasoning_content is not None:
140
+ delta["reasoning_content"] = reasoning_content
141
+ return {
142
+ "id": completion_id,
143
+ "object": "chat.completion.chunk",
144
+ "created": int(time.time()),
145
+ "model": model,
146
+ "choices": [
147
+ {
148
+ "index": 0,
149
+ "delta": delta,
150
+ "finish_reason": finish_reason,
151
+ }
152
+ ],
153
+ }
154
+
155
+
156
+ def _openai_completion(
157
+ completion_id: str,
158
+ model: str,
159
+ content: str,
160
+ reasoning_content: str,
161
+ ) -> dict:
162
+ message: dict = {"role": "assistant", "content": content}
163
+ if reasoning_content:
164
+ message["reasoning_content"] = reasoning_content
165
+ return {
166
+ "id": completion_id,
167
+ "object": "chat.completion",
168
+ "created": int(time.time()),
169
+ "model": model,
170
+ "choices": [
171
+ {
172
+ "index": 0,
173
+ "message": message,
174
+ "finish_reason": "stop",
175
+ }
176
+ ],
177
+ "usage": {
178
+ "prompt_tokens": 0,
179
+ "completion_tokens": 0,
180
+ "total_tokens": 0,
181
+ },
182
+ }
183
+
184
+
185
+ # ── /v1/models ───────────────────────────────────────────────────────
186
+
187
+
188
+ @app.get("/v1/models")
189
+ async def list_models():
190
+ models_resp = await pool.get_models()
191
+ # Normalize to list
192
+ if isinstance(models_resp, dict) and "data" in models_resp:
193
+ models_list = models_resp["data"]
194
+ elif isinstance(models_resp, list):
195
+ models_list = models_resp
196
+ else:
197
+ models_list = []
198
+
199
+ data = []
200
+ for m in models_list:
201
+ mid = m.get("id") or m.get("name", "unknown")
202
+ data.append(
203
+ {
204
+ "id": mid,
205
+ "object": "model",
206
+ "created": 0,
207
+ "owned_by": "z.ai",
208
+ }
209
+ )
210
+ return {"object": "list", "data": data}
211
+
212
+
213
+ # ── /v1/chat/completions ────────────────────────────────────────────
214
+
215
+
216
+ async def _do_request(
217
+ messages: list[dict],
218
+ model: str,
219
+ prompt: str,
220
+ tools: list[dict] | None = None,
221
+ ):
222
+ """Create a new chat and return (chat_id, async generator).
223
+
224
+ Raises on Zai errors so the caller can retry.
225
+ """
226
+ chat = await pool.create_chat(prompt, model)
227
+ chat_id = chat["id"]
228
+ gen = pool.chat_completions(
229
+ chat_id=chat_id,
230
+ messages=messages,
231
+ prompt=prompt,
232
+ model=model,
233
+ tools=tools,
234
+ )
235
+ return chat_id, gen
236
+
237
+
238
+ async def _stream_response(
239
+ messages: list[dict],
240
+ model: str,
241
+ prompt: str,
242
+ tools: list[dict] | None = None,
243
+ ):
244
+ """SSE generator with one retry on error."""
245
+ completion_id = _make_id()
246
+ retried = False
247
+
248
+ while True:
249
+ try:
250
+ _chat_id, gen = await _do_request(messages, model, prompt, tools)
251
+
252
+ # Send initial role chunk
253
+ role_chunk = {
254
+ "id": completion_id,
255
+ "object": "chat.completion.chunk",
256
+ "created": int(time.time()),
257
+ "model": model,
258
+ "choices": [
259
+ {
260
+ "index": 0,
261
+ "delta": {"role": "assistant"},
262
+ "finish_reason": None,
263
+ }
264
+ ],
265
+ }
266
+ yield f"data: {json.dumps(role_chunk, ensure_ascii=False)}\n\n"
267
+
268
+ tool_call_idx = 0
269
+ async for data in gen:
270
+ phase = data.get("phase", "")
271
+ delta = data.get("delta_content", "")
272
+
273
+ # Tool call events from Zai
274
+ if data.get("tool_calls"):
275
+ for tc in data["tool_calls"]:
276
+ tc_chunk = {
277
+ "id": completion_id,
278
+ "object": "chat.completion.chunk",
279
+ "created": int(time.time()),
280
+ "model": model,
281
+ "choices": [
282
+ {
283
+ "index": 0,
284
+ "delta": {
285
+ "tool_calls": [
286
+ {
287
+ "index": tool_call_idx,
288
+ "id": tc.get("id", f"call_{uuid.uuid4().hex[:24]}"),
289
+ "type": "function",
290
+ "function": {
291
+ "name": tc.get("function", {}).get("name", ""),
292
+ "arguments": tc.get("function", {}).get("arguments", ""),
293
+ },
294
+ }
295
+ ]
296
+ },
297
+ "finish_reason": None,
298
+ }
299
+ ],
300
+ }
301
+ yield f"data: {json.dumps(tc_chunk, ensure_ascii=False)}\n\n"
302
+ tool_call_idx += 1
303
+ elif phase == "thinking" and delta:
304
+ chunk = _openai_chunk(
305
+ completion_id, model, reasoning_content=delta
306
+ )
307
+ yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
308
+ elif phase == "answer" and delta:
309
+ chunk = _openai_chunk(completion_id, model, content=delta)
310
+ yield f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
311
+ elif phase == "done":
312
+ break
313
+
314
+ # Send finish chunk
315
+ finish_reason = "tool_calls" if tool_call_idx > 0 else "stop"
316
+ finish_chunk = _openai_chunk(
317
+ completion_id, model, finish_reason=finish_reason
318
+ )
319
+ yield f"data: {json.dumps(finish_chunk, ensure_ascii=False)}\n\n"
320
+ yield "data: [DONE]\n\n"
321
+ return
322
+
323
+ except Exception:
324
+ if retried:
325
+ # Already retried once β€” yield error and stop
326
+ error = {
327
+ "error": {
328
+ "message": "Upstream Zai error after retry",
329
+ "type": "server_error",
330
+ }
331
+ }
332
+ yield f"data: {json.dumps(error)}\n\n"
333
+ yield "data: [DONE]\n\n"
334
+ return
335
+ retried = True
336
+ await pool.refresh_auth()
337
+ # Loop back and retry
338
+
339
+
340
+ async def _sync_response(
341
+ messages: list[dict],
342
+ model: str,
343
+ prompt: str,
344
+ tools: list[dict] | None = None,
345
+ ) -> dict:
346
+ """Non-streaming response with one retry on error."""
347
+ completion_id = _make_id()
348
+
349
+ for attempt in range(2):
350
+ try:
351
+ _chat_id, gen = await _do_request(messages, model, prompt, tools)
352
+
353
+ content_parts: list[str] = []
354
+ reasoning_parts: list[str] = []
355
+ tool_calls: list[dict] = []
356
+
357
+ async for data in gen:
358
+ phase = data.get("phase", "")
359
+ delta = data.get("delta_content", "")
360
+
361
+ if data.get("tool_calls"):
362
+ for tc in data["tool_calls"]:
363
+ tool_calls.append(
364
+ {
365
+ "id": tc.get("id", f"call_{uuid.uuid4().hex[:24]}"),
366
+ "type": "function",
367
+ "function": {
368
+ "name": tc.get("function", {}).get("name", ""),
369
+ "arguments": tc.get("function", {}).get("arguments", ""),
370
+ },
371
+ }
372
+ )
373
+ elif phase == "thinking" and delta:
374
+ reasoning_parts.append(delta)
375
+ elif phase == "answer" and delta:
376
+ content_parts.append(delta)
377
+ elif phase == "done":
378
+ break
379
+
380
+ if tool_calls:
381
+ message: dict = {"role": "assistant", "content": None, "tool_calls": tool_calls}
382
+ if reasoning_parts:
383
+ message["reasoning_content"] = "".join(reasoning_parts)
384
+ return {
385
+ "id": completion_id,
386
+ "object": "chat.completion",
387
+ "created": int(time.time()),
388
+ "model": model,
389
+ "choices": [
390
+ {
391
+ "index": 0,
392
+ "message": message,
393
+ "finish_reason": "tool_calls",
394
+ }
395
+ ],
396
+ "usage": {
397
+ "prompt_tokens": 0,
398
+ "completion_tokens": 0,
399
+ "total_tokens": 0,
400
+ },
401
+ }
402
+
403
+ return _openai_completion(
404
+ completion_id,
405
+ model,
406
+ "".join(content_parts),
407
+ "".join(reasoning_parts),
408
+ )
409
+
410
+ except Exception:
411
+ if attempt == 0:
412
+ await pool.refresh_auth()
413
+ continue
414
+ return {
415
+ "error": {
416
+ "message": "Upstream Zai error after retry",
417
+ "type": "server_error",
418
+ }
419
+ }
420
+
421
+ # Unreachable, but satisfy type checker
422
+ return {"error": {"message": "Unexpected error", "type": "server_error"}}
423
+
424
+
425
+ @app.post("/v1/chat/completions")
426
+ async def chat_completions(request: Request):
427
+ body = await request.json()
428
+
429
+ model: str = body.get("model", "glm-5")
430
+ messages: list[dict] = body.get("messages", [])
431
+ stream: bool = body.get("stream", False)
432
+ tools: list[dict] | None = body.get("tools")
433
+
434
+ # Extract the last user message as the prompt for signature
435
+ prompt = ""
436
+ for msg in reversed(messages):
437
+ if msg.get("role") == "user":
438
+ content = msg.get("content", "")
439
+ if isinstance(content, str):
440
+ prompt = content
441
+ elif isinstance(content, list):
442
+ # Handle multimodal content array
443
+ prompt = " ".join(
444
+ p.get("text", "")
445
+ for p in content
446
+ if isinstance(p, dict) and p.get("type") == "text"
447
+ )
448
+ break
449
+
450
+ # Zai ignores multi-turn context β€” flatten all messages into a single
451
+ # user message with <ROLE> tags so the model sees the full conversation.
452
+ parts: list[str] = []
453
+ for msg in messages:
454
+ role = msg.get("role", "user")
455
+ content = msg.get("content", "") or ""
456
+ parts.append(f"<{role.upper()}>{content}</{role.upper()}>")
457
+ flat_content = "\n".join(parts)
458
+ messages = [{"role": "user", "content": flat_content}]
459
+
460
+ if not prompt:
461
+ return JSONResponse(
462
+ status_code=400,
463
+ content={
464
+ "error": {
465
+ "message": "No user message found in messages",
466
+ "type": "invalid_request_error",
467
+ }
468
+ },
469
+ )
470
+
471
+ if stream:
472
+ return StreamingResponse(
473
+ _stream_response(messages, model, prompt, tools),
474
+ media_type="text/event-stream",
475
+ headers={
476
+ "Cache-Control": "no-cache",
477
+ "Connection": "keep-alive",
478
+ "X-Accel-Buffering": "no",
479
+ },
480
+ )
481
+ else:
482
+ result = await _sync_response(messages, model, prompt, tools)
483
+ if "error" in result:
484
+ return JSONResponse(status_code=502, content=result)
485
+ return result
486
+
487
+
488
+ # ── Entry point ──────────────────────────────────────────────────────
489
+
490
+ if __name__ == "__main__":
491
+ uvicorn.run(app, host="0.0.0.0", port=8000)