Spaces:

junaid17
/

cortex

Running

App Files Files Community

junaid17 commited on Jan 9

Commit

b0c4dde

verified ·

1 Parent(s): e9d0cfd

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -43

app.py CHANGED Viewed

@@ -81,49 +81,40 @@ async def upload_document(
 @app.post("/chat")
 async def chat_endpoint(request: ChatRequest):
-    config = {"configurable": {"thread_id": request.thread_id}}
-    inputs = {
-        "query": request.query,
-        "RAG": request.use_rag,
-        "web_search": request.use_web,
-        "model_name": request.model_name,
-        "context": [],
-        "metadata": [],
-        "web_context": "",
-    }
-    async def event_generator():
-        # [TRICK 1] Send a "padding" comment to force Nginx to flush the buffer immediately
-        # 2KB of whitespace usually does the trick
-        padding = " " * 2048
-        yield f": {padding}\n\n"
-        async for event in rag_app.astream_events(inputs, config=config, version="v1"):
-            kind = event["event"]
-            if kind == "on_chat_model_stream":
-                content = event["data"]["chunk"].content
-                if content:
-                    # We use JSON serialization to be safe with newlines
-                    # But if you prefer your manual replace, that's fine too:
-                    # data = content.replace("\n", "\\n")
-                    # Ensure we aren't sending empty chunks
-                    if content.strip() != "":
-                        data = content.replace("\n", "\\n")
-                        yield f"data: {data}\n\n"
-    return StreamingResponse(
-        event_generator(),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "Content-Encoding": "none",  # [TRICK 2] Disable compression
-            "X-Accel-Buffering": "no",
-        },
-    )
 # ---------------- STT ---------------- #

 @app.post("/chat")
 async def chat_endpoint(request: ChatRequest):
+    """
+    Standard Chat Endpoint (Non-Streaming).
+    Waits for the LLM to finish and returns the full JSON response.
+    """
+    try:
+        # 1. Setup Config & Inputs
+        config = {"configurable": {"thread_id": request.thread_id}}
+        inputs = {
+            "query": request.query,
+            "RAG": request.use_rag,
+            "web_search": request.use_web,
+            "model_name": request.model_name,
+            "context": [],
+            "metadata": [],
+            "web_context": "",
+        }
+        # 2. Invoke the Graph (Waits for completion)
+        # using ainvoke is better for FastAPI to prevent blocking the server
+        result = await rag_app.ainvoke(inputs, config=config)
+        # 3. Extract the last message (AI Response)
+        last_message = result['response'][-1]
+        # 4. Return standard JSON
+        return {
+            "response": last_message.content,
+            "thread_id": request.thread_id
+        }
+    except Exception as e:
+        print(f"Error generation response: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 # ---------------- STT ---------------- #