Spaces:

junaid17
/

cortex

Sleeping

App Files Files Community

junaid17 commited on Jan 9

Commit

648fa9d

verified ·

1 Parent(s): f20cc14

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -45

app.py CHANGED Viewed

@@ -68,52 +68,39 @@ async def upload_document(
 @app.post("/chat")
 async def chat_endpoint(request: ChatRequest):
     """
-    Streaming endpoint adapted from your working Hugging Face snippet.
     """
-    # 1. Setup Inputs
-    config = {"configurable": {"thread_id": request.thread_id}}
-    inputs = {
-        "query": request.query,
-        "RAG": request.use_rag,
-        "web_search": request.use_web,
-        "model_name": request.model_name,
-        "context": [],
-        "metadata": [],
-        "web_context": "",
-    }
-    # 2. Define the Generator (Matching your snippet's logic)
-    async def event_generator():
-        # Iterate through events (LangGraph's version of bot.stream)
-        async for event in rag_app.astream_events(inputs, config=config, version="v1"):
-            # We look for the specific event type that contains the LLM chunks
-            kind = event["event"]
-            if kind == "on_chat_model_stream":
-                # Get the chunk data
-                chunk = event["data"]["chunk"]
-                # Logic from your snippet: check if content exists
-                if chunk and hasattr(chunk, "content"):
-                    content = chunk.content
-                    if content:
-                        # EXACT FORMATTING FROM YOUR SNIPPET
-                        data = str(content).replace("\n", "\\n")
-                        yield f"data: {data}\n\n"
-    # 3. Return StreamingResponse (Matching your snippet's headers)
-    return StreamingResponse(
-        event_generator(),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "X-Accel-Buffering": "no", # Critical for Hugging Face
-            "Connection": "keep-alive", # Added for extra safety
-        },
-    )
 # ---------------- STT ---------------- #

 @app.post("/chat")
 async def chat_endpoint(request: ChatRequest):
     """
+    Standard Chat Endpoint (Non-Streaming).
+    Waits for the LLM to finish and returns the full JSON response.
     """
+    try:
+        # 1. Setup Config & Inputs
+        config = {"configurable": {"thread_id": request.thread_id}}
+        inputs = {
+            "query": request.query,
+            "RAG": request.use_rag,
+            "web_search": request.use_web,
+            "model_name": request.model_name,
+            "context": [],
+            "metadata": [],
+            "web_context": "",
+        }
+        # 2. Invoke the Graph (Waits for completion)
+        # using ainvoke is better for FastAPI to prevent blocking the server
+        result = await rag_app.ainvoke(inputs, config=config)
+        # 3. Extract the last message (AI Response)
+        last_message = result['response'][-1]
+        # 4. Return standard JSON
+        return {
+            "response": last_message.content,
+            "thread_id": request.thread_id
+        }
+    except Exception as e:
+        print(f"Error generation response: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 # ---------------- STT ---------------- #