Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -253,18 +253,17 @@ def generate_openai_stream_chunk(content, model, chunk_id, finish_reason=None):
|
|
| 253 |
|
| 254 |
|
| 255 |
def stream_onyx_response(payload, model, session_key):
|
| 256 |
-
final_message_id = None
|
| 257 |
"""Stream response from Onyx API in OpenAI SSE format"""
|
|
|
|
|
|
|
| 258 |
chunk_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
# Try alternate endpoints if needed
|
| 262 |
endpoints = [
|
| 263 |
f"{ONYX_BASE_URL}/api/chat/send-chat-message", # Primary (new)
|
| 264 |
-
f"{ONYX_BASE_URL}/api/chat/send-message", # Fallback
|
| 265 |
]
|
| 266 |
-
|
| 267 |
-
#
|
| 268 |
initial_chunk = {
|
| 269 |
"id": chunk_id,
|
| 270 |
"object": "chat.completion.chunk",
|
|
@@ -272,96 +271,73 @@ def stream_onyx_response(payload, model, session_key):
|
|
| 272 |
"model": model,
|
| 273 |
"choices": [{
|
| 274 |
"index": 0,
|
| 275 |
-
"delta": {"role": "assistant"
|
| 276 |
"finish_reason": None
|
| 277 |
}]
|
| 278 |
}
|
| 279 |
yield f"data: {json.dumps(initial_chunk)}\n\n"
|
| 280 |
-
|
| 281 |
last_message_id = None
|
| 282 |
-
|
| 283 |
for url in endpoints:
|
| 284 |
try:
|
| 285 |
print(f"Trying endpoint: {url}")
|
| 286 |
-
|
| 287 |
-
|
| 288 |
with requests.post(
|
| 289 |
-
url,
|
| 290 |
-
json=payload,
|
| 291 |
-
headers=get_headers(),
|
| 292 |
stream=True,
|
| 293 |
timeout=120
|
| 294 |
) as response:
|
| 295 |
-
|
| 296 |
print(f"Response status: {response.status_code}")
|
| 297 |
-
|
| 298 |
-
if response.status_code == 404:
|
| 299 |
-
continue # Try next endpoint
|
| 300 |
-
|
| 301 |
if response.status_code != 200:
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
yield generate_openai_stream_chunk(
|
| 305 |
-
f"Error {response.status_code}: {error_text}",
|
| 306 |
-
model, chunk_id
|
| 307 |
-
)
|
| 308 |
-
yield generate_openai_stream_chunk("", model, chunk_id, "stop")
|
| 309 |
-
yield "data: [DONE]\n\n"
|
| 310 |
-
return
|
| 311 |
-
|
| 312 |
buffer = ""
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
if content and packet_type in ['content', 'legacy', 'raw', 'error']:
|
| 347 |
-
yield generate_openai_stream_chunk(content, model, chunk_id)
|
| 348 |
-
|
| 349 |
-
# Update session with last message ID
|
| 350 |
-
if session_key in chat_sessions_cache and last_message_id:
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
break # Success, exit loop
|
| 354 |
-
|
| 355 |
-
except requests.exceptions.RequestException as e:
|
| 356 |
-
print(f"Request error for {url}: {e}")
|
| 357 |
continue
|
| 358 |
-
|
| 359 |
-
#
|
|
|
|
|
|
|
|
|
|
| 360 |
yield generate_openai_stream_chunk("", model, chunk_id, "stop")
|
| 361 |
yield "data: [DONE]\n\n"
|
| 362 |
-
if final_message_id:
|
| 363 |
-
chat_sessions_cache[session_key]['parent_message_id'] = final_message_id
|
| 364 |
-
|
| 365 |
|
| 366 |
def collect_full_response(payload, model, session_key):
|
| 367 |
"""Collect full streaming response and return as complete OpenAI response"""
|
|
|
|
| 253 |
|
| 254 |
|
| 255 |
def stream_onyx_response(payload, model, session_key):
|
|
|
|
| 256 |
"""Stream response from Onyx API in OpenAI SSE format"""
|
| 257 |
+
|
| 258 |
+
final_message_id = None # ✅ STEP 3.1
|
| 259 |
chunk_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
| 260 |
+
|
|
|
|
|
|
|
| 261 |
endpoints = [
|
| 262 |
f"{ONYX_BASE_URL}/api/chat/send-chat-message", # Primary (new)
|
| 263 |
+
f"{ONYX_BASE_URL}/api/chat/send-message", # Fallback
|
| 264 |
]
|
| 265 |
+
|
| 266 |
+
# Initial assistant role chunk
|
| 267 |
initial_chunk = {
|
| 268 |
"id": chunk_id,
|
| 269 |
"object": "chat.completion.chunk",
|
|
|
|
| 271 |
"model": model,
|
| 272 |
"choices": [{
|
| 273 |
"index": 0,
|
| 274 |
+
"delta": {"role": "assistant"},
|
| 275 |
"finish_reason": None
|
| 276 |
}]
|
| 277 |
}
|
| 278 |
yield f"data: {json.dumps(initial_chunk)}\n\n"
|
| 279 |
+
|
| 280 |
last_message_id = None
|
| 281 |
+
|
| 282 |
for url in endpoints:
|
| 283 |
try:
|
| 284 |
print(f"Trying endpoint: {url}")
|
| 285 |
+
|
|
|
|
| 286 |
with requests.post(
|
| 287 |
+
url,
|
| 288 |
+
json=payload,
|
| 289 |
+
headers=get_headers(),
|
| 290 |
stream=True,
|
| 291 |
timeout=120
|
| 292 |
) as response:
|
| 293 |
+
|
| 294 |
print(f"Response status: {response.status_code}")
|
| 295 |
+
|
|
|
|
|
|
|
|
|
|
| 296 |
if response.status_code != 200:
|
| 297 |
+
continue
|
| 298 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
buffer = ""
|
| 300 |
+
|
| 301 |
+
for chunk in response.iter_content(decode_unicode=True):
|
| 302 |
+
if not chunk:
|
| 303 |
+
continue
|
| 304 |
+
|
| 305 |
+
buffer += chunk
|
| 306 |
+
|
| 307 |
+
while '\n' in buffer:
|
| 308 |
+
line, buffer = buffer.split('\n', 1)
|
| 309 |
+
line = line.strip()
|
| 310 |
+
|
| 311 |
+
if not line or line == "[DONE]":
|
| 312 |
+
continue
|
| 313 |
+
|
| 314 |
+
if line.startswith("data: "):
|
| 315 |
+
line = line[6:]
|
| 316 |
+
|
| 317 |
+
content, msg_id, packet_type = parse_onyx_stream_chunk(line)
|
| 318 |
+
|
| 319 |
+
if msg_id:
|
| 320 |
+
last_message_id = msg_id
|
| 321 |
+
|
| 322 |
+
if packet_type == "content" and content:
|
| 323 |
+
yield generate_openai_stream_chunk(content, model, chunk_id)
|
| 324 |
+
|
| 325 |
+
if packet_type == "stop":
|
| 326 |
+
final_message_id = last_message_id # ✅ STEP 3.2
|
| 327 |
+
break
|
| 328 |
+
|
| 329 |
+
break # success → exit endpoint loop
|
| 330 |
+
|
| 331 |
+
except Exception as e:
|
| 332 |
+
print("Stream error:", e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
continue
|
| 334 |
+
|
| 335 |
+
# ✅ STEP 3.3 — store FINAL assistant message id
|
| 336 |
+
if final_message_id and session_key in chat_sessions_cache:
|
| 337 |
+
chat_sessions_cache[session_key]["parent_message_id"] = final_message_id
|
| 338 |
+
|
| 339 |
yield generate_openai_stream_chunk("", model, chunk_id, "stop")
|
| 340 |
yield "data: [DONE]\n\n"
|
|
|
|
|
|
|
|
|
|
| 341 |
|
| 342 |
def collect_full_response(payload, model, session_key):
|
| 343 |
"""Collect full streaming response and return as complete OpenAI response"""
|