junaid17 commited on
Commit
b0c4dde
·
verified ·
1 Parent(s): e9d0cfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -43
app.py CHANGED
@@ -81,49 +81,40 @@ async def upload_document(
81
 
82
  @app.post("/chat")
83
  async def chat_endpoint(request: ChatRequest):
84
- config = {"configurable": {"thread_id": request.thread_id}}
85
-
86
- inputs = {
87
- "query": request.query,
88
- "RAG": request.use_rag,
89
- "web_search": request.use_web,
90
- "model_name": request.model_name,
91
- "context": [],
92
- "metadata": [],
93
- "web_context": "",
94
- }
95
-
96
- async def event_generator():
97
- # [TRICK 1] Send a "padding" comment to force Nginx to flush the buffer immediately
98
- # 2KB of whitespace usually does the trick
99
- padding = " " * 2048
100
- yield f": {padding}\n\n"
101
-
102
- async for event in rag_app.astream_events(inputs, config=config, version="v1"):
103
- kind = event["event"]
104
- if kind == "on_chat_model_stream":
105
- content = event["data"]["chunk"].content
106
-
107
- if content:
108
- # We use JSON serialization to be safe with newlines
109
- # But if you prefer your manual replace, that's fine too:
110
- # data = content.replace("\n", "\\n")
111
-
112
- # Ensure we aren't sending empty chunks
113
- if content.strip() != "":
114
- data = content.replace("\n", "\\n")
115
- yield f"data: {data}\n\n"
116
-
117
- return StreamingResponse(
118
- event_generator(),
119
- media_type="text/event-stream",
120
- headers={
121
- "Cache-Control": "no-cache",
122
- "Connection": "keep-alive",
123
- "Content-Encoding": "none", # [TRICK 2] Disable compression
124
- "X-Accel-Buffering": "no",
125
- },
126
- )
127
 
128
 
129
  # ---------------- STT ---------------- #
 
81
 
82
  @app.post("/chat")
83
  async def chat_endpoint(request: ChatRequest):
84
+ """
85
+ Standard Chat Endpoint (Non-Streaming).
86
+ Waits for the LLM to finish and returns the full JSON response.
87
+ """
88
+ try:
89
+ # 1. Setup Config & Inputs
90
+ config = {"configurable": {"thread_id": request.thread_id}}
91
+
92
+ inputs = {
93
+ "query": request.query,
94
+ "RAG": request.use_rag,
95
+ "web_search": request.use_web,
96
+ "model_name": request.model_name,
97
+ "context": [],
98
+ "metadata": [],
99
+ "web_context": "",
100
+ }
101
+
102
+ # 2. Invoke the Graph (Waits for completion)
103
+ # using ainvoke is better for FastAPI to prevent blocking the server
104
+ result = await rag_app.ainvoke(inputs, config=config)
105
+
106
+ # 3. Extract the last message (AI Response)
107
+ last_message = result['response'][-1]
108
+
109
+ # 4. Return standard JSON
110
+ return {
111
+ "response": last_message.content,
112
+ "thread_id": request.thread_id
113
+ }
114
+
115
+ except Exception as e:
116
+ print(f"Error generation response: {e}")
117
+ raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
118
 
119
 
120
  # ---------------- STT ---------------- #