Spaces:

junaid17
/

cortex

Sleeping

App Files Files Community

junaid17 commited on Jan 9

Commit

9554ad4

verified ·

1 Parent(s): 21ca625

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -19

app.py CHANGED Viewed

@@ -79,17 +79,16 @@ async def upload_document(
 # ... (keep existing imports) ...
 @app.post("/chat/stream")
 async def chat_stream_endpoint(request: ChatRequest):
     """
     Streaming Chat Endpoint.
-    Streams tokens/chunks as they are generated by LangGraph.
     """
     async def event_generator():
         try:
             config = {"configurable": {"thread_id": request.thread_id}}
             inputs = {
                 "query": request.query,
                 "RAG": request.use_rag,
@@ -99,31 +98,125 @@ async def chat_stream_endpoint(request: ChatRequest):
                 "metadata": [],
                 "web_context": "",
             }
-            async for event in rag_app.astream(inputs, config=config, stream_mode="values"):
                 if "response" in event:
-                    msg = event["response"][-1]
-                    if hasattr(msg, "content") and msg.content:
                         chunk = {
-                            "type": "chunk",
-                            "content": msg.content
                         }
-                        yield json.dumps(chunk) + "\n"
-            # signal end of stream
-            yield json.dumps({"type": "done"}) + "\n"
         except Exception as e:
-            error_chunk = {"type": "error", "message": str(e)}
-            yield json.dumps(error_chunk) + "\n"
     return StreamingResponse(
         event_generator(),
-        media_type="text/plain"
     )
 # ---------------- STT ---------------- #
 @app.post("/stt")
 async def transcribe_audio(file: UploadFile = File(...)):

 # ... (keep existing imports) ...
+# NEW: Streaming endpoint
 @app.post("/chat/stream")
 async def chat_stream_endpoint(request: ChatRequest):
     """
     Streaming Chat Endpoint.
+    Streams the LLM response as Server-Sent Events (SSE).
     """
     async def event_generator():
         try:
             config = {"configurable": {"thread_id": request.thread_id}}
             inputs = {
                 "query": request.query,
                 "RAG": request.use_rag,
                 "metadata": [],
                 "web_context": "",
             }
+            # Use astream or astream_events depending on your LangGraph version
+            async for event in rag_app.astream(inputs, config=config):
+                # Extract the content from the streaming event
+                # The structure depends on your graph, adjust as needed
                 if "response" in event:
+                    messages = event["response"]
+                    if messages and len(messages) > 0:
+                        last_msg = messages[-1]
+                        if hasattr(last_msg, 'content'):
+                            chunk = {
+                                "type": "content",
+                                "data": last_msg.content,
+                                "thread_id": request.thread_id
+                            }
+                            yield f"data: {json.dumps(chunk)}\n\n"
+                # If your graph streams token by token, handle it here
+                elif "chunk" in event:
+                    chunk = {
+                        "type": "token",
+                        "data": event["chunk"],
+                        "thread_id": request.thread_id
+                    }
+                    yield f"data: {json.dumps(chunk)}\n\n"
+            # Send completion signal
+            yield f"data: {json.dumps({'type': 'done', 'thread_id': request.thread_id})}\n\n"
+        except Exception as e:
+            error_data = {
+                "type": "error",
+                "error": str(e),
+                "thread_id": request.thread_id
+            }
+            yield f"data: {json.dumps(error_data)}\n\n"
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no"  # Disable nginx buffering
+        }
+    )
+# ALTERNATIVE: If you need more granular streaming with astream_events
+@app.post("/chat/stream/events")
+async def chat_stream_events_endpoint(request: ChatRequest):
+    """
+    Streaming Chat Endpoint using astream_events.
+    Provides more granular control over streaming events.
+    """
+    async def event_generator():
+        try:
+            config = {"configurable": {"thread_id": request.thread_id}}
+            inputs = {
+                "query": request.query,
+                "RAG": request.use_rag,
+                "web_search": request.use_web,
+                "model_name": request.model_name,
+                "context": [],
+                "metadata": [],
+                "web_context": "",
+            }
+            # Stream events from the graph
+            async for event in rag_app.astream_events(inputs, config=config, version="v2"):
+                event_type = event.get("event")
+                # Handle different event types
+                if event_type == "on_chat_model_stream":
+                    # This captures token-by-token streaming from the LLM
+                    content = event.get("data", {}).get("chunk", {})
+                    if hasattr(content, 'content') and content.content:
                         chunk = {
+                            "type": "token",
+                            "data": content.content,
+                            "thread_id": request.thread_id
                         }
+                        yield f"data: {json.dumps(chunk)}\n\n"
+                elif event_type == "on_chain_end":
+                    # Final result
+                    output = event.get("data", {}).get("output", {})
+                    if "response" in output:
+                        messages = output["response"]
+                        if messages and len(messages) > 0:
+                            last_msg = messages[-1]
+                            chunk = {
+                                "type": "complete",
+                                "data": last_msg.content if hasattr(last_msg, 'content') else str(last_msg),
+                                "thread_id": request.thread_id
+                            }
+                            yield f"data: {json.dumps(chunk)}\n\n"
+            # Send completion signal
+            yield f"data: {json.dumps({'type': 'done', 'thread_id': request.thread_id})}\n\n"
         except Exception as e:
+            error_data = {
+                "type": "error",
+                "error": str(e),
+                "thread_id": request.thread_id
+            }
+            yield f"data: {json.dumps(error_data)}\n\n"
     return StreamingResponse(
         event_generator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no"
+        }
     )
 # ---------------- STT ---------------- #
 @app.post("/stt")
 async def transcribe_audio(file: UploadFile = File(...)):