junaid17 commited on
Commit
648fa9d
·
verified ·
1 Parent(s): f20cc14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -45
app.py CHANGED
@@ -68,52 +68,39 @@ async def upload_document(
68
  @app.post("/chat")
69
  async def chat_endpoint(request: ChatRequest):
70
  """
71
- Streaming endpoint adapted from your working Hugging Face snippet.
 
72
  """
73
- # 1. Setup Inputs
74
- config = {"configurable": {"thread_id": request.thread_id}}
75
-
76
- inputs = {
77
- "query": request.query,
78
- "RAG": request.use_rag,
79
- "web_search": request.use_web,
80
- "model_name": request.model_name,
81
- "context": [],
82
- "metadata": [],
83
- "web_context": "",
84
- }
85
-
86
- # 2. Define the Generator (Matching your snippet's logic)
87
- async def event_generator():
88
- # Iterate through events (LangGraph's version of bot.stream)
89
- async for event in rag_app.astream_events(inputs, config=config, version="v1"):
90
-
91
- # We look for the specific event type that contains the LLM chunks
92
- kind = event["event"]
93
-
94
- if kind == "on_chat_model_stream":
95
- # Get the chunk data
96
- chunk = event["data"]["chunk"]
97
-
98
- # Logic from your snippet: check if content exists
99
- if chunk and hasattr(chunk, "content"):
100
- content = chunk.content
101
-
102
- if content:
103
- # EXACT FORMATTING FROM YOUR SNIPPET
104
- data = str(content).replace("\n", "\\n")
105
- yield f"data: {data}\n\n"
106
-
107
- # 3. Return StreamingResponse (Matching your snippet's headers)
108
- return StreamingResponse(
109
- event_generator(),
110
- media_type="text/event-stream",
111
- headers={
112
- "Cache-Control": "no-cache",
113
- "X-Accel-Buffering": "no", # Critical for Hugging Face
114
- "Connection": "keep-alive", # Added for extra safety
115
- },
116
- )
117
 
118
 
119
  # ---------------- STT ---------------- #
 
68
  @app.post("/chat")
69
  async def chat_endpoint(request: ChatRequest):
70
  """
71
+ Standard Chat Endpoint (Non-Streaming).
72
+ Waits for the LLM to finish and returns the full JSON response.
73
  """
74
+ try:
75
+ # 1. Setup Config & Inputs
76
+ config = {"configurable": {"thread_id": request.thread_id}}
77
+
78
+ inputs = {
79
+ "query": request.query,
80
+ "RAG": request.use_rag,
81
+ "web_search": request.use_web,
82
+ "model_name": request.model_name,
83
+ "context": [],
84
+ "metadata": [],
85
+ "web_context": "",
86
+ }
87
+
88
+ # 2. Invoke the Graph (Waits for completion)
89
+ # using ainvoke is better for FastAPI to prevent blocking the server
90
+ result = await rag_app.ainvoke(inputs, config=config)
91
+
92
+ # 3. Extract the last message (AI Response)
93
+ last_message = result['response'][-1]
94
+
95
+ # 4. Return standard JSON
96
+ return {
97
+ "response": last_message.content,
98
+ "thread_id": request.thread_id
99
+ }
100
+
101
+ except Exception as e:
102
+ print(f"Error generation response: {e}")
103
+ raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
 
106
  # ---------------- STT ---------------- #