junaid17 commited on
Commit
e517ecf
·
verified ·
1 Parent(s): 23413f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -18
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import shutil
3
  from fastapi.responses import FileResponse
4
  import asyncio
 
5
  from fastapi import FastAPI, UploadFile, File, Form, HTTPException, BackgroundTasks
6
  from fastapi.responses import StreamingResponse
7
  from pydantic import BaseModel
@@ -65,46 +66,53 @@ async def upload_document(
65
  raise HTTPException(status_code=500, detail=str(e))
66
 
67
 
68
- import json
69
 
70
  # ... (keep existing imports) ...
71
 
72
  @app.post("/chat")
73
  async def chat_endpoint(request: ChatRequest):
74
  """
75
- Chat endpoint that returns a STREAMING response in JSON-SSE format.
76
  """
77
-
78
  config = {"configurable": {"thread_id": request.thread_id}}
79
 
80
  inputs = {
81
  "query": request.query,
82
  "RAG": request.use_rag,
83
  "web_search": request.use_web,
84
- "model_name": request.model_name, # Ensure you passed this if you added multi-llm support
85
  "context": [],
86
  "metadata": [],
87
  "web_context": "",
88
  }
89
 
90
  async def event_generator():
91
- print(f"--- Starting stream for {request.thread_id} ---") # Log to HF console
92
 
 
93
  async for event in rag_app.astream_events(inputs, config=config, version="v1"):
94
- kind = event["event"]
95
 
96
- # Check for LLM token events
97
- if kind == "on_chat_model_stream":
98
- content = event["data"]["chunk"].content
 
 
 
 
 
 
 
 
 
99
 
100
- if content:
101
- # 1. Wrap content in a JSON object (Safer than raw text)
102
- chunk_data = json.dumps({"content": content})
103
-
104
- # 2. Yield the SSE frame
105
- yield f"data: {chunk_data}\n\n"
106
-
107
- # 3. Send a [DONE] signal so the frontend knows to stop
108
  yield "data: [DONE]\n\n"
109
 
110
  return StreamingResponse(
@@ -114,7 +122,7 @@ async def chat_endpoint(request: ChatRequest):
114
  "Cache-Control": "no-cache",
115
  "Connection": "keep-alive",
116
  "Content-Type": "text/event-stream",
117
- "X-Accel-Buffering": "no", # CRITICAL: Disables HF/Nginx buffering
118
  },
119
  )
120
 
 
2
  import shutil
3
  from fastapi.responses import FileResponse
4
  import asyncio
5
+ import json
6
  from fastapi import FastAPI, UploadFile, File, Form, HTTPException, BackgroundTasks
7
  from fastapi.responses import StreamingResponse
8
  from pydantic import BaseModel
 
66
  raise HTTPException(status_code=500, detail=str(e))
67
 
68
 
69
+
70
 
71
  # ... (keep existing imports) ...
72
 
73
  @app.post("/chat")
74
  async def chat_endpoint(request: ChatRequest):
75
  """
76
+ Robust Streaming Endpoint that logs events to console.
77
  """
 
78
  config = {"configurable": {"thread_id": request.thread_id}}
79
 
80
  inputs = {
81
  "query": request.query,
82
  "RAG": request.use_rag,
83
  "web_search": request.use_web,
84
+ "model_name": request.model_name,
85
  "context": [],
86
  "metadata": [],
87
  "web_context": "",
88
  }
89
 
90
  async def event_generator():
91
+ print(f"--- 🚀 Starting stream for {request.thread_id} ---")
92
 
93
+ # Use 'v2' if you are on the latest LangGraph, but 'v1' is safer for compatibility
94
  async for event in rag_app.astream_events(inputs, config=config, version="v1"):
 
95
 
96
+ # [DEBUG] Print the event type to your Hugging Face Logs
97
+ # This will show us if the events are firing but named differently
98
+ event_type = event.get("event")
99
+
100
+ # Logic: We don't care about the event name.
101
+ # We only care: "Does this event have a chunk with text?"
102
+ data = event.get("data", {})
103
+ chunk = data.get("chunk")
104
+
105
+ # Check if chunk exists and has .content attribute (standard LangChain message chunk)
106
+ if chunk and hasattr(chunk, "content") and chunk.content:
107
+ content = chunk.content
108
 
109
+ # Filter out empty strings or weird artifacts
110
+ if content.strip() != "":
111
+ # JSON encode the content
112
+ chunk_json = json.dumps({"content": content})
113
+ yield f"data: {chunk_json}\n\n"
114
+
115
+ # End of stream
 
116
  yield "data: [DONE]\n\n"
117
 
118
  return StreamingResponse(
 
122
  "Cache-Control": "no-cache",
123
  "Connection": "keep-alive",
124
  "Content-Type": "text/event-stream",
125
+ "X-Accel-Buffering": "no",
126
  },
127
  )
128