Spaces:

junaid17
/

cortex

Running

App Files Files Community

junaid17 commited on Jan 9

Commit

21ca625

verified ·

1 Parent(s): b0c4dde

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -33

app.py CHANGED Viewed

@@ -79,42 +79,49 @@ async def upload_document(
 # ... (keep existing imports) ...
-@app.post("/chat")
-async def chat_endpoint(request: ChatRequest):
     """
-    Standard Chat Endpoint (Non-Streaming).
-    Waits for the LLM to finish and returns the full JSON response.
     """
-    try:
-        # 1. Setup Config & Inputs
-        config = {"configurable": {"thread_id": request.thread_id}}
-        inputs = {
-            "query": request.query,
-            "RAG": request.use_rag,
-            "web_search": request.use_web,
-            "model_name": request.model_name,
-            "context": [],
-            "metadata": [],
-            "web_context": "",
-        }
-        # 2. Invoke the Graph (Waits for completion)
-        # using ainvoke is better for FastAPI to prevent blocking the server
-        result = await rag_app.ainvoke(inputs, config=config)
-        # 3. Extract the last message (AI Response)
-        last_message = result['response'][-1]
-        # 4. Return standard JSON
-        return {
-            "response": last_message.content,
-            "thread_id": request.thread_id
-        }
-    except Exception as e:
-        print(f"Error generation response: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
 # ---------------- STT ---------------- #

 # ... (keep existing imports) ...
+@app.post("/chat/stream")
+async def chat_stream_endpoint(request: ChatRequest):
     """
+    Streaming Chat Endpoint.
+    Streams tokens/chunks as they are generated by LangGraph.
     """
+    async def event_generator():
+        try:
+            config = {"configurable": {"thread_id": request.thread_id}}
+            inputs = {
+                "query": request.query,
+                "RAG": request.use_rag,
+                "web_search": request.use_web,
+                "model_name": request.model_name,
+                "context": [],
+                "metadata": [],
+                "web_context": "",
+            }
+            async for event in rag_app.astream(inputs, config=config, stream_mode="values"):
+                if "response" in event:
+                    msg = event["response"][-1]
+                    if hasattr(msg, "content") and msg.content:
+                        chunk = {
+                            "type": "chunk",
+                            "content": msg.content
+                        }
+                        yield json.dumps(chunk) + "\n"
+            # signal end of stream
+            yield json.dumps({"type": "done"}) + "\n"
+        except Exception as e:
+            error_chunk = {"type": "error", "message": str(e)}
+            yield json.dumps(error_chunk) + "\n"
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/plain"
+    )
 # ---------------- STT ---------------- #