Hiren122 commited on
Commit
1fa3299
·
verified ·
1 Parent(s): b6511a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -79
app.py CHANGED
@@ -253,18 +253,17 @@ def generate_openai_stream_chunk(content, model, chunk_id, finish_reason=None):
253
 
254
 
255
  def stream_onyx_response(payload, model, session_key):
256
- final_message_id = None
257
  """Stream response from Onyx API in OpenAI SSE format"""
 
 
258
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
259
- url = f"{ONYX_BASE_URL}/api/chat/send-message"
260
-
261
- # Try alternate endpoints if needed
262
  endpoints = [
263
  f"{ONYX_BASE_URL}/api/chat/send-chat-message", # Primary (new)
264
- f"{ONYX_BASE_URL}/api/chat/send-message", # Fallback (deprecated)
265
  ]
266
-
267
- # Send initial chunk with role
268
  initial_chunk = {
269
  "id": chunk_id,
270
  "object": "chat.completion.chunk",
@@ -272,96 +271,73 @@ def stream_onyx_response(payload, model, session_key):
272
  "model": model,
273
  "choices": [{
274
  "index": 0,
275
- "delta": {"role": "assistant", "content": ""},
276
  "finish_reason": None
277
  }]
278
  }
279
  yield f"data: {json.dumps(initial_chunk)}\n\n"
280
-
281
  last_message_id = None
282
-
283
  for url in endpoints:
284
  try:
285
  print(f"Trying endpoint: {url}")
286
- print(f"Payload: {json.dumps(payload, indent=2)}")
287
-
288
  with requests.post(
289
- url,
290
- json=payload,
291
- headers=get_headers(),
292
  stream=True,
293
  timeout=120
294
  ) as response:
295
-
296
  print(f"Response status: {response.status_code}")
297
-
298
- if response.status_code == 404:
299
- continue # Try next endpoint
300
-
301
  if response.status_code != 200:
302
- error_text = response.text
303
- print(f"Error response: {error_text}")
304
- yield generate_openai_stream_chunk(
305
- f"Error {response.status_code}: {error_text}",
306
- model, chunk_id
307
- )
308
- yield generate_openai_stream_chunk("", model, chunk_id, "stop")
309
- yield "data: [DONE]\n\n"
310
- return
311
-
312
  buffer = ""
313
- for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
314
- if chunk:
315
- buffer += chunk
316
-
317
- while '\n' in buffer:
318
- line, buffer = buffer.split('\n', 1)
319
- line = line.strip()
320
-
321
- if not line:
322
- continue
323
-
324
- if line.startswith('data: '):
325
- line = line[6:]
326
-
327
- if line == '[DONE]':
328
- continue
329
-
330
- content, msg_id, packet_type = parse_onyx_stream_chunk(line)
331
- if msg_id:
332
- last_message_id = msg_id
333
- if packet_type == 'stop':
334
- final_message_id = last_message_id
335
- break
336
- if content and packet_type in ['content', 'legacy', 'raw', 'error']:
337
- yield generate_openai_stream_chunk(content, model, chunk_id)
338
-
339
- # Process remaining buffer
340
- if buffer.strip():
341
- if buffer.strip().startswith('data: '):
342
- buffer = buffer.strip()[6:]
343
- content, msg_id, packet_type = parse_onyx_stream_chunk(buffer.strip())
344
- if msg_id:
345
- last_message_id = msg_id
346
- if content and packet_type in ['content', 'legacy', 'raw', 'error']:
347
- yield generate_openai_stream_chunk(content, model, chunk_id)
348
-
349
- # Update session with last message ID
350
- if session_key in chat_sessions_cache and last_message_id:
351
-
352
-
353
- break # Success, exit loop
354
-
355
- except requests.exceptions.RequestException as e:
356
- print(f"Request error for {url}: {e}")
357
  continue
358
-
359
- # Send final chunk
 
 
 
360
  yield generate_openai_stream_chunk("", model, chunk_id, "stop")
361
  yield "data: [DONE]\n\n"
362
- if final_message_id:
363
- chat_sessions_cache[session_key]['parent_message_id'] = final_message_id
364
-
365
 
366
  def collect_full_response(payload, model, session_key):
367
  """Collect full streaming response and return as complete OpenAI response"""
 
253
 
254
 
255
  def stream_onyx_response(payload, model, session_key):
 
256
  """Stream response from Onyx API in OpenAI SSE format"""
257
+
258
+ final_message_id = None # ✅ STEP 3.1
259
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
260
+
 
 
261
  endpoints = [
262
  f"{ONYX_BASE_URL}/api/chat/send-chat-message", # Primary (new)
263
+ f"{ONYX_BASE_URL}/api/chat/send-message", # Fallback
264
  ]
265
+
266
+ # Initial assistant role chunk
267
  initial_chunk = {
268
  "id": chunk_id,
269
  "object": "chat.completion.chunk",
 
271
  "model": model,
272
  "choices": [{
273
  "index": 0,
274
+ "delta": {"role": "assistant"},
275
  "finish_reason": None
276
  }]
277
  }
278
  yield f"data: {json.dumps(initial_chunk)}\n\n"
279
+
280
  last_message_id = None
281
+
282
  for url in endpoints:
283
  try:
284
  print(f"Trying endpoint: {url}")
285
+
 
286
  with requests.post(
287
+ url,
288
+ json=payload,
289
+ headers=get_headers(),
290
  stream=True,
291
  timeout=120
292
  ) as response:
293
+
294
  print(f"Response status: {response.status_code}")
295
+
 
 
 
296
  if response.status_code != 200:
297
+ continue
298
+
 
 
 
 
 
 
 
 
299
  buffer = ""
300
+
301
+ for chunk in response.iter_content(decode_unicode=True):
302
+ if not chunk:
303
+ continue
304
+
305
+ buffer += chunk
306
+
307
+ while '\n' in buffer:
308
+ line, buffer = buffer.split('\n', 1)
309
+ line = line.strip()
310
+
311
+ if not line or line == "[DONE]":
312
+ continue
313
+
314
+ if line.startswith("data: "):
315
+ line = line[6:]
316
+
317
+ content, msg_id, packet_type = parse_onyx_stream_chunk(line)
318
+
319
+ if msg_id:
320
+ last_message_id = msg_id
321
+
322
+ if packet_type == "content" and content:
323
+ yield generate_openai_stream_chunk(content, model, chunk_id)
324
+
325
+ if packet_type == "stop":
326
+ final_message_id = last_message_id # STEP 3.2
327
+ break
328
+
329
+ break # success → exit endpoint loop
330
+
331
+ except Exception as e:
332
+ print("Stream error:", e)
 
 
 
 
 
 
 
 
 
 
 
333
  continue
334
+
335
+ # STEP 3.3 — store FINAL assistant message id
336
+ if final_message_id and session_key in chat_sessions_cache:
337
+ chat_sessions_cache[session_key]["parent_message_id"] = final_message_id
338
+
339
  yield generate_openai_stream_chunk("", model, chunk_id, "stop")
340
  yield "data: [DONE]\n\n"
 
 
 
341
 
342
  def collect_full_response(payload, model, session_key):
343
  """Collect full streaming response and return as complete OpenAI response"""