Spaces:

radison-tech
/

pr-1

Running

App Files Files Community

Hiren122 commited on 20 days ago

Commit

1fa3299

verified ·

1 Parent(s): b6511a7

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -79

app.py CHANGED Viewed

@@ -253,18 +253,17 @@ def generate_openai_stream_chunk(content, model, chunk_id, finish_reason=None):
 def stream_onyx_response(payload, model, session_key):
-    final_message_id = None
     """Stream response from Onyx API in OpenAI SSE format"""
     chunk_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
-    url = f"{ONYX_BASE_URL}/api/chat/send-message"
-    # Try alternate endpoints if needed
     endpoints = [
         f"{ONYX_BASE_URL}/api/chat/send-chat-message",  # Primary (new)
-        f"{ONYX_BASE_URL}/api/chat/send-message",       # Fallback (deprecated)
     ]
-    # Send initial chunk with role
     initial_chunk = {
         "id": chunk_id,
         "object": "chat.completion.chunk",
@@ -272,96 +271,73 @@ def stream_onyx_response(payload, model, session_key):
         "model": model,
         "choices": [{
             "index": 0,
-            "delta": {"role": "assistant", "content": ""},
             "finish_reason": None
         }]
     }
     yield f"data: {json.dumps(initial_chunk)}\n\n"
     last_message_id = None
     for url in endpoints:
         try:
             print(f"Trying endpoint: {url}")
-            print(f"Payload: {json.dumps(payload, indent=2)}")
             with requests.post(
-                url,
-                json=payload,
-                headers=get_headers(),
                 stream=True,
                 timeout=120
             ) as response:
                 print(f"Response status: {response.status_code}")
-                if response.status_code == 404:
-                    continue  # Try next endpoint
                 if response.status_code != 200:
-                    error_text = response.text
-                    print(f"Error response: {error_text}")
-                    yield generate_openai_stream_chunk(
-                        f"Error {response.status_code}: {error_text}",
-                        model, chunk_id
-                    )
-                    yield generate_openai_stream_chunk("", model, chunk_id, "stop")
-                    yield "data: [DONE]\n\n"
-                    return
                 buffer = ""
-                for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
-                    if chunk:
-                        buffer += chunk
-                        while '\n' in buffer:
-                            line, buffer = buffer.split('\n', 1)
-                            line = line.strip()
-                            if not line:
-                                continue
-                            if line.startswith('data: '):
-                                line = line[6:]
-                            if line == '[DONE]':
-                                continue
-                            content, msg_id, packet_type = parse_onyx_stream_chunk(line)
-                            if msg_id:
-                                last_message_id = msg_id
-                            if packet_type == 'stop':
-    final_message_id = last_message_id
-    break
-                            if content and packet_type in ['content', 'legacy', 'raw', 'error']:
-                                yield generate_openai_stream_chunk(content, model, chunk_id)
-                # Process remaining buffer
-                if buffer.strip():
-                    if buffer.strip().startswith('data: '):
-                        buffer = buffer.strip()[6:]
-                    content, msg_id, packet_type = parse_onyx_stream_chunk(buffer.strip())
-                    if msg_id:
-                        last_message_id = msg_id
-                    if content and packet_type in ['content', 'legacy', 'raw', 'error']:
-                        yield generate_openai_stream_chunk(content, model, chunk_id)
-                # Update session with last message ID
-                if session_key in chat_sessions_cache and last_message_id:
-                break  # Success, exit loop
-        except requests.exceptions.RequestException as e:
-            print(f"Request error for {url}: {e}")
             continue
-    # Send final chunk
     yield generate_openai_stream_chunk("", model, chunk_id, "stop")
     yield "data: [DONE]\n\n"
-if final_message_id:
-    chat_sessions_cache[session_key]['parent_message_id'] = final_message_id
 def collect_full_response(payload, model, session_key):
     """Collect full streaming response and return as complete OpenAI response"""

 def stream_onyx_response(payload, model, session_key):
     """Stream response from Onyx API in OpenAI SSE format"""
+    final_message_id = None   # ✅ STEP 3.1
     chunk_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
     endpoints = [
         f"{ONYX_BASE_URL}/api/chat/send-chat-message",  # Primary (new)
+        f"{ONYX_BASE_URL}/api/chat/send-message",       # Fallback
     ]
+    # Initial assistant role chunk
     initial_chunk = {
         "id": chunk_id,
         "object": "chat.completion.chunk",
         "model": model,
         "choices": [{
             "index": 0,
+            "delta": {"role": "assistant"},
             "finish_reason": None
         }]
     }
     yield f"data: {json.dumps(initial_chunk)}\n\n"
     last_message_id = None
     for url in endpoints:
         try:
             print(f"Trying endpoint: {url}")
             with requests.post(
+                url,
+                json=payload,
+                headers=get_headers(),
                 stream=True,
                 timeout=120
             ) as response:
                 print(f"Response status: {response.status_code}")
                 if response.status_code != 200:
+                    continue
                 buffer = ""
+                for chunk in response.iter_content(decode_unicode=True):
+                    if not chunk:
+                        continue
+                    buffer += chunk
+                    while '\n' in buffer:
+                        line, buffer = buffer.split('\n', 1)
+                        line = line.strip()
+                        if not line or line == "[DONE]":
+                            continue
+                        if line.startswith("data: "):
+                            line = line[6:]
+                        content, msg_id, packet_type = parse_onyx_stream_chunk(line)
+                        if msg_id:
+                            last_message_id = msg_id
+                        if packet_type == "content" and content:
+                            yield generate_openai_stream_chunk(content, model, chunk_id)
+                        if packet_type == "stop":
+                            final_message_id = last_message_id   # ✅ STEP 3.2
+                            break
+                break  # success → exit endpoint loop
+        except Exception as e:
+            print("Stream error:", e)
             continue
+    # ✅ STEP 3.3 — store FINAL assistant message id
+    if final_message_id and session_key in chat_sessions_cache:
+        chat_sessions_cache[session_key]["parent_message_id"] = final_message_id
     yield generate_openai_stream_chunk("", model, chunk_id, "stop")
     yield "data: [DONE]\n\n"
 def collect_full_response(payload, model, session_key):
     """Collect full streaming response and return as complete OpenAI response"""