Kalpokoch commited on
Commit
1267728
Β·
verified Β·
1 Parent(s): 01687a9

Update app/app.py

Browse files
Files changed (1) hide show
  1. app/app.py +102 -294
app/app.py CHANGED
@@ -7,6 +7,7 @@ import re
7
  from fastapi import FastAPI, HTTPException, Request
8
  from pydantic import BaseModel
9
  from llama_cpp import Llama
 
10
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
11
 
12
  # -----------------------------
@@ -21,25 +22,20 @@ class RequestIdAdapter(logging.LoggerAdapter):
21
  logger = logging.getLogger("app")
22
 
23
  # -----------------------------
24
- # βœ… Configuration - Restored Original Efficient Settings
25
  # -----------------------------
26
  DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
27
  CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
28
  MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
29
- LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90")) # Back to original timeout
30
  RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
31
- TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3")) # Keep reduced for efficiency
32
- TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1")) # Keep reduced for efficiency
33
-
34
- # βœ… Single request processing without blocking semaphore
35
- MAX_CONCURRENT_REQUESTS = 1
36
- request_in_progress = False
37
- request_lock = asyncio.Lock()
38
 
39
  # -----------------------------
40
  # βœ… Initialize FastAPI App
41
  # -----------------------------
42
- app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.5.0")
43
 
44
  @app.middleware("http")
45
  async def add_request_id(request: Request, call_next):
@@ -50,7 +46,7 @@ async def add_request_id(request: Request, call_next):
50
  return response
51
 
52
  # -----------------------------
53
- # βœ… Vector DB Initialization
54
  # -----------------------------
55
  logger.info("Initializing vector DB...")
56
  try:
@@ -71,16 +67,16 @@ except Exception as e:
71
  db_ready = False
72
 
73
  # -----------------------------
74
- # βœ… Load GGUF Model - Restored Original Efficient Settings
75
  # -----------------------------
76
  logger.info(f"Loading GGUF model from: {MODEL_PATH}")
77
  try:
78
  llm = Llama(
79
  model_path=MODEL_PATH,
80
- n_ctx=4096, # βœ… Restored original context size
81
- n_threads=4, # βœ… Restored original thread count for efficient CPU usage
82
- n_batch=512, # βœ… Restored original batch size
83
- use_mlock=True, # βœ… Restored original memory settings
84
  verbose=False
85
  )
86
  logger.info("GGUF model loaded successfully.")
@@ -105,261 +101,84 @@ class Feedback(BaseModel):
105
  comment: str | None = None
106
 
107
  # -----------------------------
108
- # βœ… Enhanced Query Processing Functions
109
- # -----------------------------
110
- def classify_query_type(question: str) -> str:
111
- """Classify the type of query to choose appropriate search strategy."""
112
- question_lower = question.lower()
113
-
114
- if re.search(r'β‚Ή|crore|lakh|\d+.*approve|limit.*\d+', question_lower):
115
- return "monetary"
116
-
117
- if any(word in question_lower for word in ["who can", "who approve", "authority", "delegation"]):
118
- return "authority"
119
-
120
- if any(word in question_lower for word in ["how to", "procedure", "process", "steps", "requirement"]):
121
- return "procedure"
122
-
123
- if re.search(r'section|annexure|clause', question_lower):
124
- return "section_specific"
125
-
126
- return "general"
127
-
128
- def extract_monetary_amount(question: str) -> float:
129
- """Extract monetary amount from question for specialized search."""
130
- patterns = [
131
- r'β‚Ή\s*(\d+(?:,\d+)*(?:\.\d+)?)\s*crore',
132
- r'(\d+(?:,\d+)*(?:\.\d+)?)\s*crore',
133
- r'β‚Ή\s*(\d+(?:,\d+)*(?:\.\d+)?)\s*lakh',
134
- r'(\d+(?:,\d+)*(?:\.\d+)?)\s*lakh',
135
- r'β‚Ή\s*(\d+(?:,\d+)*(?:\.\d+)?)'
136
- ]
137
-
138
- for pattern in patterns:
139
- match = re.search(pattern, question, re.IGNORECASE)
140
- if match:
141
- amount = float(match.group(1).replace(',', ''))
142
- if 'crore' in pattern:
143
- return amount * 1e7
144
- elif 'lakh' in pattern:
145
- return amount * 1e5
146
- else:
147
- return amount
148
- return None
149
-
150
- def build_enhanced_prompt(question: str, context: str, query_type: str, search_results: list) -> str:
151
- """Build context-aware prompt based on query type and metadata."""
152
-
153
- roles_mentioned = set()
154
- sections_mentioned = set()
155
-
156
- for result in search_results:
157
- metadata = result.get('metadata', {})
158
- if 'role' in metadata:
159
- roles_mentioned.add(metadata['role'])
160
- if 'section' in metadata:
161
- sections_mentioned.add(metadata['section'])
162
-
163
- type_instructions = {
164
- "monetary": "Focus on monetary limits, delegation amounts, and approval authorities for the specified amount.",
165
- "authority": "Clearly identify the specific roles/positions and their delegation limits.",
166
- "procedure": "Provide step-by-step procedures and requirements in a logical order.",
167
- "section_specific": "Reference the specific sections, clauses, and policy provisions mentioned.",
168
- "general": "Provide comprehensive information based on the policy context."
169
- }
170
-
171
- instruction = type_instructions.get(query_type, type_instructions["general"])
172
-
173
- metadata_context = ""
174
- if roles_mentioned:
175
- metadata_context += f"\nRoles involved: {', '.join(roles_mentioned)}"
176
- if sections_mentioned:
177
- metadata_context += f"\nSections referenced: {', '.join(sections_mentioned)}"
178
-
179
- prompt = f"""<|system|>
180
- You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
181
- Your task is to answer the user's question based ONLY on the provided context.
182
-
183
- **Query Type**: {query_type}
184
- **Specific Instructions**: {instruction}
185
-
186
- **Formatting Rules**:
187
- - For lists or multiple items: Separate each item with a pipe symbol (|)
188
- - For monetary amounts: Always specify the exact amount and currency
189
- - For authorities: Always specify the exact role/position and their limits
190
- - If information is not in context: Reply with "The provided policy context does not contain information on this topic."
191
-
192
- {metadata_context}
193
- </s>
194
- <|user|>
195
- ### Relevant Policy Context:
196
- {context}
197
-
198
- ### Question:
199
- {question}
200
- </s>
201
- <|assistant|>
202
- ### Answer:
203
- """
204
- return prompt
205
-
206
- # -----------------------------
207
- # βœ… Efficient LLM Response Generation - Restored Original Async Pattern
208
- # -----------------------------
209
- async def generate_llm_response(prompt: str, request_id: str):
210
- """Async LLM generation using original efficient pattern."""
211
- loop = asyncio.get_running_loop()
212
-
213
- def llm_call():
214
- return llm(
215
- prompt,
216
- max_tokens=2048, # βœ… Restored original token limit
217
- stop=["###", "Question:", "Context:", "</s>"],
218
- temperature=0.05, # βœ… Restored original temperature
219
- echo=False
220
- )
221
-
222
- # βœ… Use original async executor pattern for efficient CPU usage
223
- response = await loop.run_in_executor(None, llm_call)
224
-
225
- if response and "choices" in response and len(response["choices"]) > 0:
226
- answer = response["choices"][0]["text"].strip()
227
- if not answer:
228
- raise ValueError("Empty response from LLM")
229
- return answer
230
- else:
231
- raise ValueError("Invalid response from LLM")
232
-
233
- # -----------------------------
234
- # βœ… Endpoints with Lightweight Request Management
235
  # -----------------------------
236
  def get_logger_adapter(request: Request):
237
  return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
238
 
239
  @app.get("/")
240
  async def root():
241
- return {
242
- "status": "βœ… Server is running efficiently",
243
- "mode": "CPU optimized for Hugging Face",
244
- "model_loaded": model_ready,
245
- "db_ready": db_ready
246
- }
247
 
248
  @app.get("/health")
249
  async def health_check():
250
  status = {
251
  "status": "ok",
252
  "database_status": "ready" if db_ready else "error",
253
- "model_status": "ready" if model_ready else "error",
254
- "processing_mode": "efficient_cpu_usage"
255
  }
256
  if not db_ready or not model_ready:
257
  raise HTTPException(status_code=503, detail=status)
258
  return status
259
 
 
 
 
 
 
 
 
 
 
 
 
260
  @app.post("/chat")
261
  async def chat(query: Query, request: Request):
262
- global request_in_progress
263
-
264
- # βœ… Lightweight request management - reject if busy instead of blocking
265
- async with request_lock:
266
- if request_in_progress:
267
- raise HTTPException(status_code=429, detail="Server is busy processing another request. Please try again in a moment.")
268
- request_in_progress = True
269
-
270
- try:
271
- adapter = get_logger_adapter(request)
272
- question_lower = query.question.strip().lower()
273
-
274
- # --- GREETING & INTRO HANDLING ---
275
- greeting_keywords = ["hello", "hi", "hey", "what can you do", "who are you"]
276
- if question_lower in greeting_keywords:
277
- adapter.info(f"Handling a greeting or introductory query: '{query.question}'")
278
- intro_message = (
279
- "Hello! I am an AI assistant specifically trained on NEEPCO's Delegation of Powers (DoP) policy document. "
280
- "I can help you find accurate information about approval authorities, monetary limits, procedures, and policy requirements. "
281
- "How can I assist you with the DoP policy today?"
282
- )
283
- return {
284
- "request_id": getattr(request.state, 'request_id', 'N/A'),
285
- "question": query.question,
286
- "context_used": "NA - Greeting",
287
- "answer": intro_message
288
- }
289
-
290
- if not db_ready or not model_ready:
291
- adapter.error("Service unavailable due to initialization failure.")
292
- raise HTTPException(status_code=503, detail="Service is not ready. Please check logs.")
293
 
294
- adapter.info(f"Received query: '{query.question}'")
 
 
295
 
296
- # Query classification and search
297
- query_type = classify_query_type(query.question)
298
- adapter.info(f"Query classified as: {query_type}")
299
 
300
- search_results = []
301
-
302
- # Enhanced search strategy
303
- if query_type == "monetary":
304
- amount = extract_monetary_amount(query.question)
305
- if amount:
306
- adapter.info(f"Extracted monetary amount: β‚Ή{amount}")
307
- try:
308
- monetary_results = db.search_by_amount(amount, comparison=">=", top_k=TOP_K_SEARCH)
309
- if monetary_results:
310
- search_results = monetary_results
311
- adapter.info(f"Found {len(search_results)} results using monetary search")
312
- except:
313
- adapter.info("Monetary search not available, falling back to semantic search")
314
-
315
- if not search_results:
316
- # Use enhanced search if available, otherwise fallback to basic search
317
- try:
318
- search_results = db.search_with_context(
319
- query.question,
320
- top_k=TOP_K_SEARCH,
321
- include_related=True
322
- )
323
- adapter.info(f"Found {len(search_results)} results using enhanced semantic search")
324
- except:
325
- # Fallback to basic search
326
- search_results = db.search(query.question, top_k=TOP_K_SEARCH)
327
- adapter.info(f"Found {len(search_results)} results using basic search")
328
 
329
- if not search_results:
330
- adapter.warning("No relevant context found in vector DB.")
331
- return {
332
- "request_id": getattr(request.state, 'request_id', 'N/A'),
333
- "question": query.question,
334
- "context_used": "No relevant context found.",
335
- "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing or ask about specific delegation limits, approval authorities, or procedures."
336
- }
337
-
338
- # Log search results
339
- scores = [f"{result.get('relevance_score', 0):.4f}" for result in search_results]
340
- adapter.info(f"Found {len(search_results)} relevant chunks with scores: {scores}")
341
-
342
- # Prepare context with metadata if available
343
- context_chunks = []
344
- for result in search_results[:TOP_K_CONTEXT]:
345
- chunk_text = result['text']
346
- metadata = result.get('metadata', {})
347
-
348
- if metadata and (metadata.get('section') or metadata.get('role')):
349
- metadata_prefix = f"[Section: {metadata.get('section', 'N/A')}, Role: {metadata.get('role', 'N/A')}] "
350
- chunk_text = metadata_prefix + chunk_text
351
-
352
- context_chunks.append(chunk_text)
353
-
354
- context = "\n---\n".join(context_chunks)
355
 
356
- # Build prompt - use enhanced if search results have metadata, otherwise simple
357
- if any(result.get('metadata') for result in search_results):
358
- prompt = build_enhanced_prompt(query.question, context, query_type, search_results)
359
- adapter.info(f"Using enhanced prompt for {query_type} query")
360
- else:
361
- # Fallback to original simple prompt
362
- prompt = f"""<|system|>
363
  You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
364
  Your task is to answer the user's question based ONLY on the provided context.
365
 
@@ -378,52 +197,45 @@ Your task is to answer the user's question based ONLY on the provided context.
378
  <|assistant|>
379
  ### Detailed Answer:
380
  """
381
- adapter.info("Using original simple prompt")
382
 
383
- # Generate response using original efficient async pattern
384
- answer = "An error occurred while processing your request."
385
- try:
386
- adapter.info("Sending prompt to LLM for generation...")
387
- raw_answer = await asyncio.wait_for(
388
- generate_llm_response(prompt, request.state.request_id),
389
- timeout=LLM_TIMEOUT_SECONDS
390
- )
391
- adapter.info(f"LLM generation successful. Raw response: {raw_answer[:250]}...")
392
-
393
- # Post-processing logic
394
- if '|' in raw_answer:
395
- adapter.info("Pipe separator found. Formatting response as a bulleted list.")
396
- items = raw_answer.split('|')
397
- cleaned_items = [f"β€’ {item.strip()}" for item in items if item.strip()]
398
- answer = "\n".join(cleaned_items)
399
- else:
400
- answer = raw_answer.strip()
401
-
402
- # Add monetary context if needed
403
- if query_type == "monetary" and "β‚Ή" not in answer and extract_monetary_amount(query.question):
404
- amount = extract_monetary_amount(query.question)
405
- answer = f"For amounts of β‚Ή{amount:,.0f}:\n\n{answer}"
406
 
407
- except asyncio.TimeoutError:
408
- adapter.warning(f"LLM generation timed out after {LLM_TIMEOUT_SECONDS} seconds.")
409
- answer = "Sorry, the request took too long to process. Please try again with a simpler question."
410
- except Exception as e:
411
- adapter.error(f"An unexpected error occurred during LLM generation: {e}", exc_info=True)
412
- answer = "Sorry, an unexpected error occurred while generating a response."
413
 
414
- adapter.info(f"Final answer prepared for {query_type} query. Returning to client.")
415
- return {
416
- "request_id": request.state.request_id,
417
- "question": query.question,
418
- "context_used": context,
419
- "answer": answer,
420
- "query_type": query_type if 'query_type' in locals() else "general"
421
- }
422
-
423
- finally:
424
- # οΏ½οΏ½ Always release the lock
425
- async with request_lock:
426
- request_in_progress = False
427
 
428
  @app.post("/feedback")
429
  async def collect_feedback(feedback: Feedback, request: Request):
@@ -439,7 +251,3 @@ async def collect_feedback(feedback: Feedback, request: Request):
439
  }
440
  adapter.info(json.dumps(feedback_log))
441
  return {"status": "βœ… Feedback recorded. Thank you!"}
442
-
443
- @app.on_event("shutdown")
444
- async def shutdown_event():
445
- logger.info("Application shutting down.")
 
7
  from fastapi import FastAPI, HTTPException, Request
8
  from pydantic import BaseModel
9
  from llama_cpp import Llama
10
+ # Correctly reference the module within the 'app' package
11
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
12
 
13
  # -----------------------------
 
22
  logger = logging.getLogger("app")
23
 
24
  # -----------------------------
25
+ # βœ… Configuration
26
  # -----------------------------
27
  DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
28
  CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
29
  MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
30
+ LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
31
  RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
32
+ TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
33
+ TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
 
 
 
 
 
34
 
35
  # -----------------------------
36
  # βœ… Initialize FastAPI App
37
  # -----------------------------
38
+ app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.1.0")
39
 
40
  @app.middleware("http")
41
  async def add_request_id(request: Request, call_next):
 
46
  return response
47
 
48
  # -----------------------------
49
+ # βœ… Vector DB and Data Initialization
50
  # -----------------------------
51
  logger.info("Initializing vector DB...")
52
  try:
 
67
  db_ready = False
68
 
69
  # -----------------------------
70
+ # βœ… Load TinyLlama GGUF Model
71
  # -----------------------------
72
  logger.info(f"Loading GGUF model from: {MODEL_PATH}")
73
  try:
74
  llm = Llama(
75
  model_path=MODEL_PATH,
76
+ n_ctx=4096,
77
+ n_threads=4,
78
+ n_batch=512,
79
+ use_mlock=True,
80
  verbose=False
81
  )
82
  logger.info("GGUF model loaded successfully.")
 
101
  comment: str | None = None
102
 
103
  # -----------------------------
104
+ # βœ… Endpoints
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  # -----------------------------
106
  def get_logger_adapter(request: Request):
107
  return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
108
 
109
  @app.get("/")
110
  async def root():
111
+ return {"status": "βœ… Server is running."}
 
 
 
 
 
112
 
113
  @app.get("/health")
114
  async def health_check():
115
  status = {
116
  "status": "ok",
117
  "database_status": "ready" if db_ready else "error",
118
+ "model_status": "ready" if model_ready else "error"
 
119
  }
120
  if not db_ready or not model_ready:
121
  raise HTTPException(status_code=503, detail=status)
122
  return status
123
 
124
+ async def generate_llm_response(prompt: str, request_id: str):
125
+ loop = asyncio.get_running_loop()
126
+ response = await loop.run_in_executor(
127
+ None,
128
+ lambda: llm(prompt, max_tokens=2048, stop=["###", "Question:", "Context:", "</s>"], temperature=0.05, echo=False)
129
+ )
130
+ answer = response["choices"][0]["text"].strip()
131
+ if not answer:
132
+ raise ValueError("Empty response from LLM")
133
+ return answer
134
+
135
  @app.post("/chat")
136
  async def chat(query: Query, request: Request):
137
+ adapter = get_logger_adapter(request)
138
+ question_lower = query.question.strip().lower()
139
+
140
+ # --- GREETING & INTRO HANDLING ---
141
+ greeting_keywords = ["hello", "hi", "hey", "what can you do", "who are you"]
142
+ if question_lower in greeting_keywords:
143
+ adapter.info(f"Handling a greeting or introductory query: '{query.question}'")
144
+ intro_message = (
145
+ "Hello! I am an AI assistant specifically trained on NEEPCO's Delegation of Powers (DoP) policy document. "
146
+ "My purpose is to help you find accurate information and answer questions based on this specific dataset. "
147
+ "I am currently running on a CPU-based environment. How can I assist you with the DoP policy today?"
148
+ )
149
+ return {
150
+ "request_id": getattr(request.state, 'request_id', 'N/A'),
151
+ "question": query.question,
152
+ "context_used": "NA - Greeting",
153
+ "answer": intro_message
154
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
+ if not db_ready or not model_ready:
157
+ adapter.error("Service unavailable due to initialization failure.")
158
+ raise HTTPException(status_code=503, detail="Service is not ready. Please check logs.")
159
 
160
+ adapter.info(f"Received query: '{query.question}'")
 
 
161
 
162
+ # 1. Search Vector DB
163
+ search_results = db.search(query.question, top_k=TOP_K_SEARCH)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
+ if not search_results:
166
+ adapter.warning("No relevant context found in vector DB.")
167
+ return {
168
+ "question": query.question,
169
+ "context_used": "No relevant context found.",
170
+ "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
171
+ }
172
+
173
+ scores = [f"{result['relevance_score']:.4f}" for result in search_results]
174
+ adapter.info(f"Found {len(search_results)} relevant chunks with scores: {scores}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
+ # 2. Prepare Context
177
+ context_chunks = [result['text'] for result in search_results[:TOP_K_CONTEXT]]
178
+ context = "\n---\n".join(context_chunks)
179
+
180
+ # 3. Build Prompt with Separator Instruction
181
+ prompt = f"""<|system|>
 
182
  You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
183
  Your task is to answer the user's question based ONLY on the provided context.
184
 
 
197
  <|assistant|>
198
  ### Detailed Answer:
199
  """
 
200
 
201
+ # 4. Generate Response
202
+ answer = "An error occurred while processing your request."
203
+ try:
204
+ adapter.info("Sending prompt to LLM for generation...")
205
+ raw_answer = await asyncio.wait_for(
206
+ generate_llm_response(prompt, request.state.request_id),
207
+ timeout=LLM_TIMEOUT_SECONDS
208
+ )
209
+ adapter.info(f"LLM generation successful. Raw response: {raw_answer[:250]}...")
210
+
211
+ # --- POST-PROCESSING LOGIC ---
212
+ # Check if the model used the pipe separator, indicating a list.
213
+ if '|' in raw_answer:
214
+ adapter.info("Pipe separator found. Formatting response as a bulleted list.")
215
+ # Split the string into a list of items
216
+ items = raw_answer.split('|')
217
+ # Clean up each item and format it as a bullet point
218
+ cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
219
+ # Join them back together with newlines
220
+ answer = "\n".join(cleaned_items)
221
+ else:
222
+ # If no separator, use the answer as is.
223
+ answer = raw_answer
224
 
225
+ except asyncio.TimeoutError:
226
+ adapter.warning(f"LLM generation timed out after {LLM_TIMEOUT_SECONDS} seconds.")
227
+ answer = "Sorry, the request took too long to process. Please try again with a simpler question."
228
+ except Exception as e:
229
+ adapter.error(f"An unexpected error occurred during LLM generation: {e}", exc_info=True)
230
+ answer = "Sorry, an unexpected error occurred while generating a response."
231
 
232
+ adapter.info(f"Final answer prepared. Returning to client.")
233
+ return {
234
+ "request_id": request.state.request_id,
235
+ "question": query.question,
236
+ "context_used": context,
237
+ "answer": answer
238
+ }
 
 
 
 
 
 
239
 
240
  @app.post("/feedback")
241
  async def collect_feedback(feedback: Feedback, request: Request):
 
251
  }
252
  adapter.info(json.dumps(feedback_log))
253
  return {"status": "βœ… Feedback recorded. Thank you!"}