hins111 commited on
Commit
e536852
·
verified ·
1 Parent(s): e6f2697

Rename adapter.ts to main.py

Browse files
Files changed (2) hide show
  1. adapter.ts +0 -255
  2. main.py +260 -0
adapter.ts DELETED
@@ -1,255 +0,0 @@
1
- // deno run --allow-net --allow-env adapter.ts
2
-
3
- import { serve } from "https://deno.land/std@0.203.0/http/server.ts";
4
-
5
- // --- Configuration from Environment Variables (Safer for deployment) ---
6
-
7
- function getKeysFromEnv(envVarName: string): Set<string> {
8
- const keysString = Deno.env.get(envVarName);
9
- if (!keysString) {
10
- console.warn(`Environment variable ${envVarName} is not set.`);
11
- return new Set();
12
- }
13
- // Split by comma and trim whitespace, filter out empty strings
14
- return new Set(keysString.split(',').map(k => k.trim()).filter(Boolean));
15
- }
16
-
17
- // Client keys will be read from Hugging Face Secrets
18
- const CLIENT_API_KEYS = getKeysFromEnv("CLIENT_KEYS");
19
-
20
- // CodeGeeX tokens will also be read from Hugging Face Secrets
21
- const codegeeXTokensRaw = Array.from(getKeysFromEnv("CODEGEEX_KEYS"));
22
-
23
- const CODEGEEX_TOKENS: {
24
- token: string;
25
- isValid: boolean;
26
- lastUsed: number;
27
- errorCount: number;
28
- }[] = codegeeXTokensRaw.map(token => ({
29
- token: token,
30
- isValid: true,
31
- lastUsed: 0,
32
- errorCount: 0
33
- }));
34
-
35
-
36
- const MAX_ERROR_COUNT = 3;
37
- const ERROR_COOLDOWN = 300 * 1000; // ms
38
-
39
- // --- Utilities ---
40
- function now(): number {
41
- return Date.now();
42
- }
43
-
44
- function rotateToken(): typeof CODEGEEX_TOKENS[0] | null {
45
- if (CODEGEEX_TOKENS.length === 0) {
46
- console.error("CODEGEEX_TOKENS array is empty. Check your CODEGEEX_KEYS secret.");
47
- return null;
48
- }
49
-
50
- const available = CODEGEEX_TOKENS.filter(t => {
51
- if (!t.isValid) return false;
52
- if (t.errorCount >= MAX_ERROR_COUNT && now() - t.lastUsed < ERROR_COOLDOWN) return false;
53
- return true;
54
- });
55
- if (available.length === 0) return null;
56
-
57
- // reset cooled-down tokens
58
- for (const t of available) {
59
- if (t.errorCount >= MAX_ERROR_COUNT && now() - t.lastUsed >= ERROR_COOLDOWN) {
60
- t.errorCount = 0;
61
- }
62
- }
63
-
64
- // pick the one least recently used, then lowest errorCount
65
- available.sort((a, b) => a.lastUsed - b.lastUsed || a.errorCount - b.errorCount);
66
- const tok = available[0];
67
- tok.lastUsed = now();
68
- return tok;
69
- }
70
-
71
- // This function translates the OpenAI format to CodeGeeX format
72
- function convertToCodeGeeXPayload(params: { model: string; messages: any[] }) {
73
- // CodeGeeX seems to use the last message's content as the main prompt.
74
- // The history part is more complex, here we simplify it.
75
- const lastMessage = params.messages.slice(-1)[0];
76
- const history = params.messages.slice(0, -1)
77
- .filter(msg => msg.role === 'user' || msg.role === 'assistant')
78
- .map(msg => ({
79
- role: msg.role,
80
- content: msg.content
81
- }));
82
-
83
- return {
84
- user_role: 0, // This seems to be a fixed value
85
- ide: "HuggingFace", // Let's identify the source
86
- prompt: lastMessage?.content || "",
87
- history: history, // Passing a simplified history
88
- model: params.model,
89
- };
90
- }
91
-
92
-
93
- async function proxyChat(req: Request, params: { stream: boolean; model: string; messages: any[] }) {
94
- const tokenObj = rotateToken();
95
- if (!tokenObj) {
96
- return new Response(JSON.stringify({ error: { message: "No valid CodeGeeX tokens available", type: "server_error" } }), { status: 503, headers: { "Content-Type": "application/json" }});
97
- }
98
-
99
- const payload = convertToCodeGeeXPayload(params);
100
-
101
- try {
102
- const response = await fetch("https://codegeex.cn/prod/code/chatCodeSseV3/chat", {
103
- method: "POST",
104
- headers: {
105
- "Content-Type": "application/json",
106
- "Accept": "text/event-stream",
107
- "code-token": tokenObj.token,
108
- },
109
- body: JSON.stringify(payload),
110
- });
111
-
112
- if (!response.ok) {
113
- console.error(`Upstream error from CodeGeeX: ${response.status}`);
114
- if (response.status === 401 || response.status === 403) {
115
- tokenObj.isValid = false;
116
- console.warn(`Token ${tokenObj.token.substring(0, 15)}... marked as invalid due to 401/403 error.`);
117
- } else {
118
- tokenObj.errorCount++;
119
- console.warn(`Token ${tokenObj.token.substring(0, 15)}... error count increased to ${tokenObj.errorCount}.`);
120
- }
121
- const errorBody = await response.text();
122
- return new Response(JSON.stringify({ error: { message: `Upstream error ${response.status}: ${errorBody}`, type: "upstream_error" } }), { status: 502, headers: { "Content-Type": "application/json" }});
123
- }
124
-
125
- // For stream, we must transform the raw CodeGeeX SSE to OpenAI format
126
- if (params.stream) {
127
- const { readable, writable } = new TransformStream();
128
- const writer = writable.getWriter();
129
- const encoder = new TextEncoder();
130
-
131
- // This function processes the stream from CodeGeeX and sends OpenAI compatible chunks
132
- (async () => {
133
- const reader = response.body?.getReader();
134
- if (!reader) {
135
- await writer.close();
136
- return;
137
- }
138
- const decoder = new TextDecoder();
139
- const completionId = `chatcmpl-${crypto.randomUUID()}`;
140
- const creationTime = Math.floor(now() / 1000);
141
-
142
- try {
143
- while(true) {
144
- const { done, value } = await reader.read();
145
- if (done) break;
146
-
147
- const chunkText = decoder.decode(value);
148
- // A simple transformation: assume the raw chunk is the content delta
149
- const openAIChunk = {
150
- id: completionId,
151
- object: "chat.completion.chunk",
152
- created: creationTime,
153
- model: params.model,
154
- choices: [{ delta: { content: chunkText }, index: 0, finish_reason: null }]
155
- };
156
- await writer.write(encoder.encode(`data: ${JSON.stringify(openAIChunk)}\n\n`));
157
- }
158
- // Send the final DONE chunk
159
- await writer.write(encoder.encode(`data: [DONE]\n\n`));
160
- } catch (e) {
161
- console.error("Error while transforming stream:", e);
162
- } finally {
163
- await writer.close();
164
- }
165
- })();
166
-
167
- return new Response(readable, {
168
- status: 200,
169
- headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" },
170
- });
171
- } else {
172
- // accumulate and return JSON
173
- const text = await response.text();
174
- return new Response(JSON.stringify({
175
- id: `chatcmpl-${crypto.randomUUID()}`,
176
- object: "chat.completion",
177
- created: Math.floor(now() / 1000),
178
- model: params.model,
179
- choices: [{ message: { role: "assistant", content: text }, index: 0, finish_reason: "stop" }],
180
- usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 } // Placeholder usage
181
- }), {
182
- status: 200,
183
- headers: { "Content-Type": "application/json" },
184
- });
185
- }
186
- } catch (err) {
187
- tokenObj.errorCount++;
188
- console.error("Fetch to CodeGeeX failed:", err);
189
- return new Response(JSON.stringify({ error: { message: err.message, type: "server_error" } }), { status: 500, headers: { "Content-Type": "application/json" }});
190
- }
191
- }
192
-
193
- // --- Main Handler ---
194
- async function handler(req: Request): Promise<Response> {
195
- const url = new URL(req.url);
196
- console.log(`Received request: ${req.method} ${url.pathname}`);
197
-
198
- // CORS preflight request handler for web clients
199
- if (req.method === 'OPTIONS') {
200
- return new Response(null, {
201
- status: 204,
202
- headers: {
203
- 'Access-Control-Allow-Origin': '*',
204
- 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
205
- 'Access-Control-Allow-Headers': 'Content-Type, Authorization',
206
- },
207
- });
208
- }
209
-
210
- // Authentication middleware
211
- const auth = req.headers.get("Authorization")?.replace(/^Bearer\s+/, "");
212
- if (CLIENT_API_KEYS.size === 0) {
213
- console.error("Server misconfigured: CLIENT_KEYS secret is not set or empty.");
214
- return new Response(JSON.stringify({ error: { message: "Server misconfigured: no client keys", type: "server_error" }}), { status: 503, headers: { "Content-Type": "application/json" }});
215
- }
216
- if (!auth || !CLIENT_API_KEYS.has(auth)) {
217
- return new Response(JSON.stringify({ error: { message: "Invalid or missing API key", type: "auth_error" }}), {
218
- status: 401,
219
- headers: { "WWW-Authenticate": "Bearer", "Content-Type": "application/json" },
220
- });
221
- }
222
-
223
- // GET /v1/models
224
- if (url.pathname === "/v1/models" && req.method === "GET") {
225
- const modelData = [
226
- { id: "codegeex-4", object: "model", created: Math.floor(now() / 1000), owned_by: "codegeex" },
227
- { id: "codegeex-pro", object: "model", created: Math.floor(now() / 1000), owned_by: "codegeex" }
228
- ];
229
- return new Response(JSON.stringify({ object: "list", data: modelData }), {
230
- headers: { "Content-Type": "application/json" },
231
- });
232
- }
233
-
234
- // POST /v1/chat/completions
235
- if (url.pathname === "/v1/chat/completions" && req.method === "POST") {
236
- try {
237
- const body = await req.json();
238
- const { model, messages, stream = true } = body;
239
- if (!model || !Array.isArray(messages) || messages.length === 0) {
240
- return new Response(JSON.stringify({ error: { message: "Bad Request: 'model' and 'messages' are required.", type: "invalid_request_error" } }), { status: 400, headers: { "Content-Type": "application/json" }});
241
- }
242
- return proxyChat(req, { model, messages, stream });
243
- } catch (e) {
244
- return new Response(JSON.stringify({ error: { message: "Invalid JSON body.", type: "invalid_request_error" } }), { status: 400, headers: { "Content-Type": "application/json" }});
245
- }
246
- }
247
-
248
- // Not found
249
- return new Response(JSON.stringify({ error: "Not Found" }), { status: 404, headers: { "Content-Type": "application/json" }});
250
- }
251
-
252
- // --- Start Server ---
253
- const PORT = 7860; // Use the standard port for Hugging Face Spaces
254
- console.log(`Starting Deno CodeGeeX Adapter on http://0.0.0.0:${PORT}`);
255
- serve(handler, { port: PORT });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ===================================================================
2
+ # main.py (已修改以适配 Hugging Face Secrets)
3
+ # ===================================================================
4
+
5
+ import json
6
+ import os
7
+ import time
8
+ import uuid
9
+ import threading
10
+ from typing import Any, Dict, List, Optional, TypedDict, Union
11
+
12
+ import requests
13
+ from fastapi import FastAPI, HTTPException, Depends, Query
14
+ from fastapi.responses import StreamingResponse
15
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
16
+ from pydantic import BaseModel, Field
17
+
18
+ # --- 类型定义和全局变量 (与原始文件一致) ---
19
+ class CodeGeeXToken(TypedDict):
20
+ token: str
21
+ is_valid: bool
22
+ last_used: float
23
+ error_count: int
24
+
25
+ VALID_CLIENT_KEYS: set = set()
26
+ CODEGEEX_TOKENS: List[CodeGeeXToken] = []
27
+ CODEGEEX_MODELS: List[str] = ["claude-3-7-sonnet", "claude-sonnet-4"]
28
+ token_rotation_lock = threading.Lock()
29
+ MAX_ERROR_COUNT = 3
30
+ ERROR_COOLDOWN = 300
31
+ DEBUG_MODE = os.environ.get("DEBUG_MODE", "false").lower() == "true"
32
+
33
+ # --- Pydantic 模型 (与原始文件一致) ---
34
+ class ChatMessage(BaseModel):
35
+ role: str
36
+ content: Union[str, List[Dict[str, Any]]]
37
+ reasoning_content: Optional[str] = None
38
+
39
+ class ChatCompletionRequest(BaseModel):
40
+ model: str
41
+ messages: List[ChatMessage]
42
+ stream: bool = True
43
+ temperature: Optional[float] = None
44
+ max_tokens: Optional[int] = None
45
+ top_p: Optional[float] = None
46
+
47
+ class ModelInfo(BaseModel):
48
+ id: str
49
+ object: str = "model"
50
+ created: int
51
+ owned_by: str
52
+
53
+ class ModelList(BaseModel):
54
+ object: str = "list"
55
+ data: List[ModelInfo]
56
+ # ... (其他 Pydantic 模型与原始文件一致)
57
+ class ChatCompletionChoice(BaseModel):
58
+ message: ChatMessage
59
+ index: int = 0
60
+ finish_reason: str = "stop"
61
+ class ChatCompletionResponse(BaseModel):
62
+ id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex}")
63
+ object: str = "chat.completion"
64
+ created: int = Field(default_factory=lambda: int(time.time()))
65
+ model: str
66
+ choices: List[ChatCompletionChoice]
67
+ usage: Dict[str, int] = Field(default_factory=lambda: {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0})
68
+ class StreamChoice(BaseModel):
69
+ delta: Dict[str, Any] = Field(default_factory=dict)
70
+ index: int = 0
71
+ finish_reason: Optional[str] = None
72
+ class StreamResponse(BaseModel):
73
+ id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex}")
74
+ object: str = "chat.completion.chunk"
75
+ created: int = Field(default_factory=lambda: int(time.time()))
76
+ model: str
77
+ choices: List[StreamChoice]
78
+
79
+ # --- FastAPI App ---
80
+ app = FastAPI(title="CodeGeeX OpenAI API Adapter")
81
+ security = HTTPBearer(auto_error=False)
82
+
83
+ def log_debug(message: str):
84
+ if DEBUG_MODE:
85
+ print(f"[DEBUG] {message}")
86
+
87
+ # --- 配置加载函数 (已修改为从 Secrets 读取) ---
88
+ def load_client_api_keys_from_secrets():
89
+ """从环境变量加载客户端 API Keys"""
90
+ global VALID_CLIENT_KEYS
91
+ try:
92
+ keys_str = os.environ.get("CLIENT_API_KEYS")
93
+ if not keys_str:
94
+ raise ValueError("Secret 'CLIENT_API_KEYS' not found.")
95
+ keys = json.loads(keys_str)
96
+ VALID_CLIENT_KEYS = set(keys) if isinstance(keys, list) else set()
97
+ print(f"Successfully loaded {len(VALID_CLIENT_KEYS)} client API keys from secrets.")
98
+ except Exception as e:
99
+ print(f"FATAL: Error loading client API keys from secrets: {e}")
100
+ VALID_CLIENT_KEYS = set()
101
+
102
+ def load_codegeex_tokens_from_secrets():
103
+ """从环境变量加载 CodeGeeX Tokens"""
104
+ global CODEGEEX_TOKENS
105
+ CODEGEEX_TOKENS = []
106
+ try:
107
+ tokens_str = os.environ.get("CODEGEEX_TOKENS")
108
+ if not tokens_str:
109
+ raise ValueError("Secret 'CODEGEEX_TOKENS' not found.")
110
+ tokens = json.loads(tokens_str) # 假设Secret是一个JSON数组
111
+ if not isinstance(tokens, list):
112
+ raise TypeError("Secret 'CODEGEEX_TOKENS' must be a JSON list of strings.")
113
+
114
+ for token in tokens:
115
+ if isinstance(token, str) and token:
116
+ CODEGEEX_TOKENS.append({
117
+ "token": token, "is_valid": True, "last_used": 0, "error_count": 0
118
+ })
119
+ print(f"Successfully loaded {len(CODEGEEX_TOKENS)} CodeGeeX tokens from secrets.")
120
+ except Exception as e:
121
+ print(f"FATAL: Error loading CodeGeeX tokens from secrets: {e}")
122
+
123
+ # --- 核心逻辑 (与原始文件保持一致) ---
124
+ def get_best_codegeex_token() -> Optional[CodeGeeXToken]:
125
+ with token_rotation_lock:
126
+ now = time.time()
127
+ valid_tokens = [t for t in CODEGEEX_TOKENS if t["is_valid"] and (t["error_count"] < MAX_ERROR_COUNT or now - t["last_used"] > ERROR_COOLDOWN)]
128
+ if not valid_tokens: return None
129
+ for token in valid_tokens:
130
+ if token["error_count"] >= MAX_ERROR_COUNT and now - token["last_used"] > ERROR_COOLDOWN: token["error_count"] = 0
131
+ valid_tokens.sort(key=lambda x: (x["last_used"], x["error_count"]))
132
+ token = valid_tokens[0]
133
+ token["last_used"] = now
134
+ return token
135
+
136
+ def _convert_messages_to_codegeex_format(messages: List[ChatMessage]):
137
+ if not messages: return "", []
138
+ last_user_msg = next((msg for msg in reversed(messages) if msg.role == "user"), None)
139
+ if not last_user_msg: raise HTTPException(status_code=400, detail="No user message found.")
140
+ prompt = last_user_msg.content if isinstance(last_user_msg.content, str) else ""
141
+ history, user_content, assistant_content = [], "", ""
142
+ for msg in messages:
143
+ if msg == last_user_msg: break
144
+ if msg.role == "user":
145
+ if user_content and assistant_content: history.append({"query": user_content, "answer": assistant_content, "id": f"{uuid.uuid4()}"}); user_content, assistant_content = "", ""
146
+ user_content = msg.content if isinstance(msg.content, str) else ""
147
+ elif msg.role == "assistant":
148
+ assistant_content = msg.content if isinstance(msg.content, str) else ""
149
+ if user_content: history.append({"query": user_content, "answer": assistant_content, "id": f"{uuid.uuid4()}"}); user_content, assistant_content = "", ""
150
+ if user_content and not assistant_content: prompt = user_content + "\n" + prompt
151
+ return prompt, history
152
+
153
+ async def authenticate_client(auth: Optional[HTTPAuthorizationCredentials] = Depends(security)):
154
+ if not VALID_CLIENT_KEYS: raise HTTPException(status_code=503, detail="Service unavailable: Client API keys not configured.")
155
+ if not auth or not auth.credentials: raise HTTPException(status_code=401, detail="API key required.", headers={"WWW-Authenticate": "Bearer"})
156
+ if auth.credentials not in VALID_CLIENT_KEYS: raise HTTPException(status_code=403, detail="Invalid client API key.")
157
+
158
+ @app.on_event("startup")
159
+ async def startup():
160
+ print("Starting CodeGeeX OpenAI API Adapter server...")
161
+ load_client_api_keys_from_secrets()
162
+ load_codegeex_tokens_from_secrets()
163
+ print("Server initialization completed.")
164
+
165
+ def get_models_list_response() -> ModelList:
166
+ return ModelList(data=[ModelInfo(id=model, created=int(time.time()), owned_by="anthropic") for model in CODEGEEX_MODELS])
167
+
168
+ @app.get("/v1/models", response_model=ModelList)
169
+ async def list_v1_models(_: None = Depends(authenticate_client)):
170
+ return get_models_list_response()
171
+
172
+ @app.get("/models", response_model=ModelList)
173
+ async def list_models_no_auth():
174
+ return get_models_list_response()
175
+
176
+ # ... (所有路由和核心函数都与原始文件一致, 此处省略以保持简洁, 但它们都在上面的完整代码块中)
177
+ # --- The rest of the original code follows ---
178
+ # This includes _codegeex_stream_generator, _build_codegeex_non_stream_response, chat_completions, etc.
179
+ # They are included in the full code block above.
180
+ def _codegeex_stream_generator(response, model: str):
181
+ stream_id = f"chatcmpl-{uuid.uuid4().hex}"
182
+ created_time = int(time.time())
183
+ yield f"data: {StreamResponse(id=stream_id, created=created_time, model=model, choices=[StreamChoice(delta={'role': 'assistant'})]).json()}\n\n"
184
+ buffer = ""
185
+ try:
186
+ for chunk in response.iter_content(chunk_size=1024):
187
+ if not chunk: continue
188
+ buffer += chunk.decode("utf-8", errors='ignore')
189
+ while "\n\n" in buffer:
190
+ event_data, buffer = buffer.split("\n\n", 1)
191
+ event_data = event_data.strip()
192
+ if not event_data: continue
193
+ event_type, data_json = None, None
194
+ for line in event_data.split("\n"):
195
+ if line.startswith("event:"): event_type = line[6:].strip()
196
+ elif line.startswith("data:"):
197
+ try: data_json = json.loads(line[5:].strip())
198
+ except: continue
199
+ if not event_type or not data_json: continue
200
+ if event_type == "add":
201
+ delta = data_json.get("text", "")
202
+ if delta: yield f"data: {StreamResponse(id=stream_id, created=created_time, model=model, choices=[StreamChoice(delta={'content': delta})]).json()}\n\n"
203
+ elif event_type == "finish":
204
+ yield f"data: {StreamResponse(id=stream_id, created=created_time, model=model, choices=[StreamChoice(delta={}, finish_reason='stop')]).json()}\n\n"
205
+ yield "data: [DONE]\n\n"
206
+ return
207
+ except Exception as e:
208
+ log_debug(f"Stream processing error: {e}")
209
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
210
+ yield f"data: {StreamResponse(id=stream_id, created=created_time, model=model, choices=[StreamChoice(delta={}, finish_reason='stop')]).json()}\n\n"
211
+ yield "data: [DONE]\n\n"
212
+
213
+ def _build_codegeex_non_stream_response(response, model: str) -> ChatCompletionResponse:
214
+ full_content = ""
215
+ buffer = ""
216
+ for chunk in response.iter_content(chunk_size=1024):
217
+ if not chunk: continue
218
+ buffer += chunk.decode("utf-8", errors='ignore')
219
+ while "\n\n" in buffer:
220
+ event_data, buffer = buffer.split("\n\n", 1)
221
+ event_data = event_data.strip()
222
+ if not event_data: continue
223
+ event_type, data_json = None, None
224
+ for line in event_data.split("\n"):
225
+ if line.startswith("event:"): event_type = line[6:].strip()
226
+ elif line.startswith("data:"):
227
+ try: data_json = json.loads(line[5:].strip())
228
+ except: continue
229
+ if not event_type or not data_json: continue
230
+ if event_type == "add": full_content += data_json.get("text", "")
231
+ elif event_type == "finish":
232
+ finish_text = data_json.get("text", "")
233
+ if finish_text: full_content = finish_text
234
+ return ChatCompletionResponse(model=model, choices=[ChatCompletionChoice(message=ChatMessage(role="assistant", content=full_content))])
235
+ return ChatCompletionResponse(model=model, choices=[ChatCompletionChoice(message=ChatMessage(role="assistant", content=full_content))])
236
+
237
+ @app.post("/v1/chat/completions")
238
+ async def chat_completions(request: ChatCompletionRequest, _: None = Depends(authenticate_client)):
239
+ if request.model not in CODEGEEX_MODELS: raise HTTPException(status_code=404, detail=f"Model '{request.model}' not found.")
240
+ if not request.messages: raise HTTPException(status_code=400, detail="No messages provided.")
241
+ try: prompt, history = _convert_messages_to_codegeex_format(request.messages)
242
+ except Exception as e: raise HTTPException(status_code=400, detail=f"Failed to process messages: {e}")
243
+ for attempt in range(len(CODEGEEX_TOKENS) + 1):
244
+ if attempt == len(CODEGEEX_TOKENS): raise HTTPException(status_code=503, detail="All attempts to contact CodeGeeX API failed.")
245
+ token = get_best_codegeex_token()
246
+ if not token: raise HTTPException(status_code=503, detail="No valid CodeGeeX tokens available.")
247
+ try:
248
+ payload = {"user_role": 0, "ide": "VSCode", "prompt": prompt, "model": request.model, "history": history, "talkId": f"{uuid.uuid4()}", "plugin_version": "", "locale": "", "agent": None, "candidates": {"candidate_msg_id": "", "candidate_type": "", "selected_candidate": ""}, "ide_version": "", "machineId": ""}
249
+ headers = {"User-Agent": "Mozilla/5.0", "Accept": "text/event-stream", "Content-Type": "application/json", "code-token": token["token"]}
250
+ response = requests.post("https://codegeex.cn/prod/code/chatCodeSseV3/chat", data=json.dumps(payload), headers=headers, stream=True, timeout=300.0)
251
+ response.raise_for_status()
252
+ if request.stream: return StreamingResponse(_codegeex_stream_generator(response, request.model), media_type="text/event-stream", headers={"Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no"})
253
+ else: return _build_codegeex_non_stream_response(response, request.model)
254
+ except requests.HTTPError as e:
255
+ status_code = getattr(e.response, "status_code", 500)
256
+ with token_rotation_lock:
257
+ if status_code in [401, 403]: token["is_valid"] = False
258
+ elif status_code in [429, 500, 502, 503, 504]: token["error_count"] += 1
259
+ except Exception as e:
260
+ with token_rotation_lock: token["error_count"] += 1