bluewinliang commited on
Commit
16171fa
·
verified ·
1 Parent(s): bf3f212

Upload proxy_handler.py

Browse files
Files changed (1) hide show
  1. proxy_handler.py +155 -115
proxy_handler.py CHANGED
@@ -1,6 +1,7 @@
1
  """
2
  Proxy handler for Z.AI API requests
3
  """
 
4
  import json
5
  import logging
6
  import re
@@ -12,20 +13,25 @@ from fastapi.responses import StreamingResponse
12
 
13
  from config import settings
14
  from cookie_manager import cookie_manager
15
- from models import ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamResponse
 
 
 
 
16
 
17
  logger = logging.getLogger(__name__)
18
 
 
19
  class ProxyHandler:
20
  def __init__(self):
21
  self.client = httpx.AsyncClient(timeout=60.0)
22
-
23
  async def __aenter__(self):
24
  return self
25
-
26
  async def __aexit__(self, exc_type, exc_val, exc_tb):
27
  await self.client.aclose()
28
-
29
  def transform_content(self, content: str) -> str:
30
  """Transform content by replacing HTML tags and optionally removing think tags"""
31
  if not content:
@@ -39,79 +45,95 @@ class ProxyHandler:
39
  original_length = len(content)
40
 
41
  # Remove <details> blocks (thinking content) - handle both closed and unclosed tags
42
- content = re.sub(r'<details[^>]*>.*?</details>', '', content, flags=re.DOTALL)
43
- content = re.sub(r'<details[^>]*>.*?(?=\s*[A-Z]|\s*\d|\s*$)', '', content, flags=re.DOTALL)
 
 
 
 
 
 
 
44
  content = content.strip()
45
 
46
- logger.debug(f"Content length after removing thinking content: {original_length} -> {len(content)}")
 
 
47
  else:
48
  logger.debug("Keeping thinking content, converting to <think> tags")
49
 
50
  # Replace <details> with <think>
51
- content = re.sub(r'<details[^>]*>', '<think>', content)
52
- content = content.replace('</details>', '</think>')
53
  # Remove <summary> tags and their content
54
- content = re.sub(r'<summary>.*?</summary>', '', content, flags=re.DOTALL)
55
 
56
  # If there's no closing </think>, add it at the end of thinking content
57
- if '<think>' in content and '</think>' not in content:
58
- think_start = content.find('<think>')
59
  if think_start != -1:
60
- answer_match = re.search(r'\n\s*[A-Z0-9]', content[think_start:])
61
  if answer_match:
62
  insert_pos = think_start + answer_match.start()
63
- content = content[:insert_pos] + '</think>\n' + content[insert_pos:]
 
 
64
  else:
65
- content += '</think>'
66
 
67
  return content.strip()
68
-
69
  def transform_delta_content(self, content: str) -> str:
70
  """Transform delta content for streaming"""
71
  if not content:
72
  return content
73
-
74
  # Convert <details> to <think> and remove summary tags
75
- content = re.sub(r'<details[^>]*>', '<think>', content)
76
- content = content.replace('</details>', '</think>')
77
- content = re.sub(r'<summary>.*?</summary>', '', content, flags=re.DOTALL)
78
-
79
  return content
80
-
81
  async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
82
  """Proxy request to Z.AI API"""
 
83
  cookie = await cookie_manager.get_next_cookie()
84
  if not cookie:
85
  raise HTTPException(status_code=503, detail="No available cookies")
86
-
87
  # Transform model name
88
- target_model = settings.UPSTREAM_MODEL if request.model == settings.MODEL_NAME else request.model
89
-
 
 
 
 
90
  # Build request data based on the actual Z.AI API format
91
  import uuid
92
  from datetime import datetime
93
 
94
  current_time = datetime.now()
95
-
96
  # Generate unique IDs for the request
97
  chat_id = str(uuid.uuid4())
98
  request_id = str(uuid.uuid4())
99
-
100
  # Transform messages to include message_id
101
  messages_with_ids = []
102
  for msg in request.model_dump()["messages"]:
103
  message_with_id = {
104
  **msg,
105
- "message_id": str(uuid.uuid4()) # Add message_id to each message
106
  }
107
  messages_with_ids.append(message_with_id)
108
-
109
  request_data = {
110
  "stream": True,
111
  "model": target_model,
112
  "messages": messages_with_ids, # Use messages with IDs
113
  "chat_id": chat_id, # Add chat_id
114
- "id": request_id, # Add request ID
115
  "params": {},
116
  "tool_servers": [],
117
  "features": {
@@ -124,9 +146,9 @@ class ProxyHandler:
124
  "features": [
125
  {"type": "mcp", "server": "vibe-coding", "status": "hidden"},
126
  {"type": "mcp", "server": "ppt-maker", "status": "hidden"},
127
- {"type": "mcp", "server": "image-search", "status": "hidden"}
128
  ],
129
- "enable_thinking": True
130
  },
131
  "variables": {
132
  "{{USER_NAME}}": "User",
@@ -136,7 +158,7 @@ class ProxyHandler:
136
  "{{CURRENT_TIME}}": current_time.strftime("%H:%M:%S"),
137
  "{{CURRENT_WEEKDAY}}": current_time.strftime("%A"),
138
  "{{CURRENT_TIMEZONE}}": "Asia/Taipei",
139
- "{{USER_LANGUAGE}}": "zh-CN"
140
  },
141
  "model_item": {
142
  "id": target_model,
@@ -147,7 +169,7 @@ class ProxyHandler:
147
  "name": target_model,
148
  "owned_by": "openai",
149
  "openai": {"id": target_model},
150
- "urlIdx": 1
151
  },
152
  "urlIdx": 1,
153
  "info": {
@@ -155,11 +177,7 @@ class ProxyHandler:
155
  "user_id": "7080a6c5-5fcc-4ea4-a85f-3b3fac905cf2",
156
  "base_model_id": None,
157
  "name": "GLM-4.5",
158
- "params": {
159
- "top_p": 0.95,
160
- "temperature": 0.6,
161
- "max_tokens": 80000
162
- },
163
  "meta": {
164
  "profile_image_url": "/static/favicon.png",
165
  "description": "Most advanced model, proficient in coding and tool use",
@@ -174,16 +192,21 @@ class ProxyHandler:
174
  "file_qa": True,
175
  "returnFc": True,
176
  "returnThink": True,
177
- "think": True
178
  },
179
- "mcpServerIds": ["deep-web-search", "ppt-maker", "image-search", "vibe-coding"]
180
- }
181
- }
182
- }
 
 
 
 
 
183
  }
184
 
185
  logger.debug(f"Sending request data: {json.dumps(request_data, indent=2)}")
186
-
187
  # Use the exact headers from your curl request
188
  headers = {
189
  "Accept": "*/*",
@@ -201,63 +224,70 @@ class ProxyHandler:
201
  "X-FE-Version": "prod-fe-1.0.57",
202
  "sec-ch-ua": '"Chromium";v="137", "Not/A)Brand";v="24"',
203
  "sec-ch-ua-mobile": "?1",
204
- "sec-ch-ua-platform": '"Android"'
205
  }
206
-
207
  try:
208
  response = await self.client.post(
209
- settings.UPSTREAM_URL,
210
- json=request_data,
211
- headers=headers
212
  )
213
-
214
  if response.status_code == 401:
215
  await cookie_manager.mark_cookie_failed(cookie)
216
  raise HTTPException(status_code=401, detail="Invalid authentication")
217
-
218
  if response.status_code != 200:
219
  logger.error(f"Upstream error: {response.status_code} - {response.text}")
220
- raise HTTPException(status_code=response.status_code, detail=f"Upstream error: {response.text}")
221
-
 
 
 
222
  await cookie_manager.mark_cookie_success(cookie)
223
  return {"response": response, "cookie": cookie}
224
-
225
  except httpx.RequestError as e:
226
  logger.error(f"Request error: {e}")
227
  await cookie_manager.mark_cookie_failed(cookie)
228
  raise HTTPException(status_code=503, detail="Upstream service unavailable")
229
-
230
- async def process_streaming_response_real_time(self, response: httpx.Response) -> AsyncGenerator[Dict[str, Any], None]:
 
 
231
  """Process streaming response in real time - truly streaming"""
232
  buffer = ""
233
-
234
  async for chunk in response.aiter_text():
235
  if not chunk:
236
  continue
237
-
238
  buffer += chunk
239
- lines = buffer.split('\n')
240
  buffer = lines[-1] # Keep incomplete line in buffer
241
-
242
  for line in lines[:-1]:
243
  line = line.strip()
244
  if not line.startswith("data: "):
245
  continue
246
-
247
  payload = line[6:].strip()
248
  if payload == "[DONE]":
249
  return
250
-
251
  try:
252
  parsed = json.loads(payload)
253
 
254
  # Check for errors first
255
  if parsed.get("error") or (parsed.get("data", {}).get("error")):
256
- error_detail = (parsed.get("error", {}).get("detail") or
257
- parsed.get("data", {}).get("error", {}).get("detail") or
258
- "Unknown error from upstream")
 
 
259
  logger.error(f"Upstream error: {error_detail}")
260
- raise HTTPException(status_code=400, detail=f"Upstream error: {error_detail}")
 
 
261
 
262
  # Transform the response immediately
263
  if parsed.get("data"):
@@ -275,16 +305,20 @@ class ProxyHandler:
275
  yield parsed
276
 
277
  except json.JSONDecodeError as e:
278
- logger.debug(f"JSON decode error for line: {line[:100]}... Error: {e}")
 
 
279
  continue # Skip non-JSON lines
280
-
281
  async def handle_chat_completion(self, request: ChatCompletionRequest):
282
  """Handle chat completion request"""
283
  proxy_result = await self.proxy_request(request)
284
  response = proxy_result["response"]
285
 
286
  # Determine final streaming mode
287
- is_streaming = request.stream if request.stream is not None else settings.DEFAULT_STREAM
 
 
288
 
289
  if is_streaming:
290
  return StreamingResponse(
@@ -293,19 +327,21 @@ class ProxyHandler:
293
  headers={
294
  "Cache-Control": "no-cache",
295
  "Connection": "keep-alive",
296
- }
297
  )
298
  else:
299
  return await self.non_stream_response(response, request.model)
300
 
301
- async def stream_response_real_time(self, response: httpx.Response, model: str) -> AsyncGenerator[str, None]:
 
 
302
  """Generate truly real-time streaming response in OpenAI format"""
303
  import uuid
304
  import time
305
-
306
  # Generate a unique completion ID
307
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
308
-
309
  try:
310
  # Process each chunk immediately as it arrives - true streaming!
311
  async for parsed in self.process_streaming_response_real_time(response):
@@ -313,12 +349,18 @@ class ProxyHandler:
313
  data = parsed.get("data", {})
314
  delta_content = data.get("delta_content", "")
315
  phase = data.get("phase", "")
316
-
317
  # For SHOW_THINK_TAGS=false, filter out non-answer content
318
- if not settings.SHOW_THINK_TAGS and phase != "answer" and delta_content:
319
- logger.debug(f"Skipping content in {phase} phase (SHOW_THINK_TAGS=false)")
 
 
 
 
 
 
320
  continue
321
-
322
  # Send content immediately if available
323
  if delta_content:
324
  openai_chunk = {
@@ -326,58 +368,53 @@ class ProxyHandler:
326
  "object": "chat.completion.chunk",
327
  "created": int(time.time()),
328
  "model": model,
329
- "choices": [{
330
- "index": 0,
331
- "delta": {
332
- "content": delta_content
333
- },
334
- "finish_reason": None
335
- }]
336
  }
337
-
338
  chunk_json = json.dumps(openai_chunk)
339
  yield f"data: {chunk_json}\n\n"
340
  logger.debug(f"Sent chunk: {chunk_json[:100]}...")
341
-
342
  except Exception as e:
343
  logger.error(f"Error processing streaming chunk: {e}")
344
  continue
345
-
346
  # Send final completion chunk
347
  final_chunk = {
348
  "id": completion_id,
349
- "object": "chat.completion.chunk",
350
  "created": int(time.time()),
351
  "model": model,
352
- "choices": [{
353
- "index": 0,
354
- "delta": {},
355
- "finish_reason": "stop"
356
- }]
357
  }
358
-
359
  yield f"data: {json.dumps(final_chunk)}\n\n"
360
  yield "data: [DONE]\n\n"
361
-
362
  except Exception as e:
363
  logger.error(f"Streaming error: {e}")
364
  # Send error in OpenAI format
365
- error_chunk = {
366
- "error": {
367
- "message": str(e),
368
- "type": "server_error"
369
- }
370
- }
371
  yield f"data: {json.dumps(error_chunk)}\n\n"
372
-
373
- async def non_stream_response(self, response: httpx.Response, model: str) -> ChatCompletionResponse:
 
 
374
  """Generate non-streaming response by collecting all chunks"""
375
  chunks = []
376
-
377
  # For non-streaming, we still collect all chunks first
378
  async for parsed in self.process_streaming_response_real_time(response):
379
  chunks.append(parsed)
380
- logger.debug(f"Collected chunk: {parsed.get('data', {}).get('delta_content', '')[:50]}...")
 
 
381
 
382
  if not chunks:
383
  raise HTTPException(status_code=500, detail="No response from upstream")
@@ -405,15 +442,18 @@ class ProxyHandler:
405
 
406
  # Create OpenAI-compatible response
407
  return ChatCompletionResponse(
408
- id=chunks[0].get("data", {}).get("id", "chatcmpl-unknown") if chunks else "chatcmpl-unknown",
 
 
 
 
409
  created=int(time.time()),
410
  model=model,
411
- choices=[{
412
- "index": 0,
413
- "message": {
414
- "role": "assistant",
415
- "content": transformed_content
416
- },
417
- "finish_reason": "stop"
418
- }]
419
  )
 
1
  """
2
  Proxy handler for Z.AI API requests
3
  """
4
+
5
  import json
6
  import logging
7
  import re
 
13
 
14
  from config import settings
15
  from cookie_manager import cookie_manager
16
+ from models import (
17
+ ChatCompletionRequest,
18
+ ChatCompletionResponse,
19
+ ChatCompletionStreamResponse,
20
+ )
21
 
22
  logger = logging.getLogger(__name__)
23
 
24
+
25
  class ProxyHandler:
26
  def __init__(self):
27
  self.client = httpx.AsyncClient(timeout=60.0)
28
+
29
  async def __aenter__(self):
30
  return self
31
+
32
  async def __aexit__(self, exc_type, exc_val, exc_tb):
33
  await self.client.aclose()
34
+
35
  def transform_content(self, content: str) -> str:
36
  """Transform content by replacing HTML tags and optionally removing think tags"""
37
  if not content:
 
45
  original_length = len(content)
46
 
47
  # Remove <details> blocks (thinking content) - handle both closed and unclosed tags
48
+ content = re.sub(
49
+ r"<details[^>]*>.*?</details>", "", content, flags=re.DOTALL
50
+ )
51
+ content = re.sub(
52
+ r"<details[^>]*>.*?(?=\s*[A-Z]|\s*\d|\s*$)",
53
+ "",
54
+ content,
55
+ flags=re.DOTALL,
56
+ )
57
  content = content.strip()
58
 
59
+ logger.debug(
60
+ f"Content length after removing thinking content: {original_length} -> {len(content)}"
61
+ )
62
  else:
63
  logger.debug("Keeping thinking content, converting to <think> tags")
64
 
65
  # Replace <details> with <think>
66
+ content = re.sub(r"<details[^>]*>", "<think>", content)
67
+ content = content.replace("</details>", "</think>")
68
  # Remove <summary> tags and their content
69
+ content = re.sub(r"<summary>.*?</summary>", "", content, flags=re.DOTALL)
70
 
71
  # If there's no closing </think>, add it at the end of thinking content
72
+ if "<think>" in content and "</think>" not in content:
73
+ think_start = content.find("<think>")
74
  if think_start != -1:
75
+ answer_match = re.search(r"\n\s*[A-Z0-9]", content[think_start:])
76
  if answer_match:
77
  insert_pos = think_start + answer_match.start()
78
+ content = (
79
+ content[:insert_pos] + "</think>\n" + content[insert_pos:]
80
+ )
81
  else:
82
+ content += "</think>"
83
 
84
  return content.strip()
85
+
86
  def transform_delta_content(self, content: str) -> str:
87
  """Transform delta content for streaming"""
88
  if not content:
89
  return content
90
+
91
  # Convert <details> to <think> and remove summary tags
92
+ content = re.sub(r"<details[^>]*>", "<think>", content)
93
+ content = content.replace("</details>", "</think>")
94
+ content = re.sub(r"<summary>.*?</summary>", "", content, flags=re.DOTALL)
95
+
96
  return content
97
+
98
  async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
99
  """Proxy request to Z.AI API"""
100
+
101
  cookie = await cookie_manager.get_next_cookie()
102
  if not cookie:
103
  raise HTTPException(status_code=503, detail="No available cookies")
104
+
105
  # Transform model name
106
+ target_model = (
107
+ settings.UPSTREAM_MODEL
108
+ if request.model == settings.MODEL_NAME
109
+ else request.model
110
+ )
111
+
112
  # Build request data based on the actual Z.AI API format
113
  import uuid
114
  from datetime import datetime
115
 
116
  current_time = datetime.now()
117
+
118
  # Generate unique IDs for the request
119
  chat_id = str(uuid.uuid4())
120
  request_id = str(uuid.uuid4())
121
+
122
  # Transform messages to include message_id
123
  messages_with_ids = []
124
  for msg in request.model_dump()["messages"]:
125
  message_with_id = {
126
  **msg,
127
+ "message_id": str(uuid.uuid4()), # Add message_id to each message
128
  }
129
  messages_with_ids.append(message_with_id)
130
+
131
  request_data = {
132
  "stream": True,
133
  "model": target_model,
134
  "messages": messages_with_ids, # Use messages with IDs
135
  "chat_id": chat_id, # Add chat_id
136
+ "id": request_id, # Add request ID
137
  "params": {},
138
  "tool_servers": [],
139
  "features": {
 
146
  "features": [
147
  {"type": "mcp", "server": "vibe-coding", "status": "hidden"},
148
  {"type": "mcp", "server": "ppt-maker", "status": "hidden"},
149
+ {"type": "mcp", "server": "image-search", "status": "hidden"},
150
  ],
151
+ "enable_thinking": True,
152
  },
153
  "variables": {
154
  "{{USER_NAME}}": "User",
 
158
  "{{CURRENT_TIME}}": current_time.strftime("%H:%M:%S"),
159
  "{{CURRENT_WEEKDAY}}": current_time.strftime("%A"),
160
  "{{CURRENT_TIMEZONE}}": "Asia/Taipei",
161
+ "{{USER_LANGUAGE}}": "zh-CN",
162
  },
163
  "model_item": {
164
  "id": target_model,
 
169
  "name": target_model,
170
  "owned_by": "openai",
171
  "openai": {"id": target_model},
172
+ "urlIdx": 1,
173
  },
174
  "urlIdx": 1,
175
  "info": {
 
177
  "user_id": "7080a6c5-5fcc-4ea4-a85f-3b3fac905cf2",
178
  "base_model_id": None,
179
  "name": "GLM-4.5",
180
+ "params": {"top_p": 0.95, "temperature": 0.6, "max_tokens": 80000},
 
 
 
 
181
  "meta": {
182
  "profile_image_url": "/static/favicon.png",
183
  "description": "Most advanced model, proficient in coding and tool use",
 
192
  "file_qa": True,
193
  "returnFc": True,
194
  "returnThink": True,
195
+ "think": True,
196
  },
197
+ "mcpServerIds": [
198
+ "deep-web-search",
199
+ "ppt-maker",
200
+ "image-search",
201
+ "vibe-coding",
202
+ ],
203
+ },
204
+ },
205
+ },
206
  }
207
 
208
  logger.debug(f"Sending request data: {json.dumps(request_data, indent=2)}")
209
+
210
  # Use the exact headers from your curl request
211
  headers = {
212
  "Accept": "*/*",
 
224
  "X-FE-Version": "prod-fe-1.0.57",
225
  "sec-ch-ua": '"Chromium";v="137", "Not/A)Brand";v="24"',
226
  "sec-ch-ua-mobile": "?1",
227
+ "sec-ch-ua-platform": '"Android"',
228
  }
229
+
230
  try:
231
  response = await self.client.post(
232
+ settings.UPSTREAM_URL, json=request_data, headers=headers
 
 
233
  )
234
+
235
  if response.status_code == 401:
236
  await cookie_manager.mark_cookie_failed(cookie)
237
  raise HTTPException(status_code=401, detail="Invalid authentication")
238
+
239
  if response.status_code != 200:
240
  logger.error(f"Upstream error: {response.status_code} - {response.text}")
241
+ raise HTTPException(
242
+ status_code=response.status_code,
243
+ detail=f"Upstream error: {response.text}",
244
+ )
245
+
246
  await cookie_manager.mark_cookie_success(cookie)
247
  return {"response": response, "cookie": cookie}
248
+
249
  except httpx.RequestError as e:
250
  logger.error(f"Request error: {e}")
251
  await cookie_manager.mark_cookie_failed(cookie)
252
  raise HTTPException(status_code=503, detail="Upstream service unavailable")
253
+
254
+ async def process_streaming_response_real_time(
255
+ self, response: httpx.Response
256
+ ) -> AsyncGenerator[Dict[str, Any], None]:
257
  """Process streaming response in real time - truly streaming"""
258
  buffer = ""
259
+
260
  async for chunk in response.aiter_text():
261
  if not chunk:
262
  continue
263
+
264
  buffer += chunk
265
+ lines = buffer.split("\n")
266
  buffer = lines[-1] # Keep incomplete line in buffer
267
+
268
  for line in lines[:-1]:
269
  line = line.strip()
270
  if not line.startswith("data: "):
271
  continue
272
+
273
  payload = line[6:].strip()
274
  if payload == "[DONE]":
275
  return
276
+
277
  try:
278
  parsed = json.loads(payload)
279
 
280
  # Check for errors first
281
  if parsed.get("error") or (parsed.get("data", {}).get("error")):
282
+ error_detail = (
283
+ parsed.get("error", {}).get("detail")
284
+ or parsed.get("data", {}).get("error", {}).get("detail")
285
+ or "Unknown error from upstream"
286
+ )
287
  logger.error(f"Upstream error: {error_detail}")
288
+ raise HTTPException(
289
+ status_code=400, detail=f"Upstream error: {error_detail}"
290
+ )
291
 
292
  # Transform the response immediately
293
  if parsed.get("data"):
 
305
  yield parsed
306
 
307
  except json.JSONDecodeError as e:
308
+ logger.debug(
309
+ f"JSON decode error for line: {line[:100]}... Error: {e}"
310
+ )
311
  continue # Skip non-JSON lines
312
+
313
  async def handle_chat_completion(self, request: ChatCompletionRequest):
314
  """Handle chat completion request"""
315
  proxy_result = await self.proxy_request(request)
316
  response = proxy_result["response"]
317
 
318
  # Determine final streaming mode
319
+ is_streaming = (
320
+ request.stream if request.stream is not None else settings.DEFAULT_STREAM
321
+ )
322
 
323
  if is_streaming:
324
  return StreamingResponse(
 
327
  headers={
328
  "Cache-Control": "no-cache",
329
  "Connection": "keep-alive",
330
+ },
331
  )
332
  else:
333
  return await self.non_stream_response(response, request.model)
334
 
335
+ async def stream_response_real_time(
336
+ self, response: httpx.Response, model: str
337
+ ) -> AsyncGenerator[str, None]:
338
  """Generate truly real-time streaming response in OpenAI format"""
339
  import uuid
340
  import time
341
+
342
  # Generate a unique completion ID
343
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
344
+
345
  try:
346
  # Process each chunk immediately as it arrives - true streaming!
347
  async for parsed in self.process_streaming_response_real_time(response):
 
349
  data = parsed.get("data", {})
350
  delta_content = data.get("delta_content", "")
351
  phase = data.get("phase", "")
352
+
353
  # For SHOW_THINK_TAGS=false, filter out non-answer content
354
+ if (
355
+ not settings.SHOW_THINK_TAGS
356
+ and phase != "answer"
357
+ and delta_content
358
+ ):
359
+ logger.debug(
360
+ f"Skipping content in {phase} phase (SHOW_THINK_TAGS=false)"
361
+ )
362
  continue
363
+
364
  # Send content immediately if available
365
  if delta_content:
366
  openai_chunk = {
 
368
  "object": "chat.completion.chunk",
369
  "created": int(time.time()),
370
  "model": model,
371
+ "choices": [
372
+ {
373
+ "index": 0,
374
+ "delta": {"content": delta_content},
375
+ "finish_reason": None,
376
+ }
377
+ ],
378
  }
379
+
380
  chunk_json = json.dumps(openai_chunk)
381
  yield f"data: {chunk_json}\n\n"
382
  logger.debug(f"Sent chunk: {chunk_json[:100]}...")
383
+
384
  except Exception as e:
385
  logger.error(f"Error processing streaming chunk: {e}")
386
  continue
387
+
388
  # Send final completion chunk
389
  final_chunk = {
390
  "id": completion_id,
391
+ "object": "chat.completion.chunk",
392
  "created": int(time.time()),
393
  "model": model,
394
+ "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
 
 
 
 
395
  }
396
+
397
  yield f"data: {json.dumps(final_chunk)}\n\n"
398
  yield "data: [DONE]\n\n"
399
+
400
  except Exception as e:
401
  logger.error(f"Streaming error: {e}")
402
  # Send error in OpenAI format
403
+ error_chunk = {"error": {"message": str(e), "type": "server_error"}}
 
 
 
 
 
404
  yield f"data: {json.dumps(error_chunk)}\n\n"
405
+
406
+ async def non_stream_response(
407
+ self, response: httpx.Response, model: str
408
+ ) -> ChatCompletionResponse:
409
  """Generate non-streaming response by collecting all chunks"""
410
  chunks = []
411
+
412
  # For non-streaming, we still collect all chunks first
413
  async for parsed in self.process_streaming_response_real_time(response):
414
  chunks.append(parsed)
415
+ logger.debug(
416
+ f"Collected chunk: {parsed.get('data', {}).get('delta_content', '')[:50]}..."
417
+ )
418
 
419
  if not chunks:
420
  raise HTTPException(status_code=500, detail="No response from upstream")
 
442
 
443
  # Create OpenAI-compatible response
444
  return ChatCompletionResponse(
445
+ id=(
446
+ chunks[0].get("data", {}).get("id", "chatcmpl-unknown")
447
+ if chunks
448
+ else "chatcmpl-unknown"
449
+ ),
450
  created=int(time.time()),
451
  model=model,
452
+ choices=[
453
+ {
454
+ "index": 0,
455
+ "message": {"role": "assistant", "content": transformed_content},
456
+ "finish_reason": "stop",
457
+ }
458
+ ],
 
459
  )