Kalpokoch commited on
Commit
7f343ef
·
verified ·
1 Parent(s): 784b064

Update app/app.py

Browse files
Files changed (1) hide show
  1. app/app.py +184 -33
app/app.py CHANGED
@@ -6,7 +6,9 @@ import uuid
6
  import re
7
  from fastapi import FastAPI, HTTPException, Request
8
  from pydantic import BaseModel
 
9
  from llama_cpp import Llama
 
10
  # Correctly reference the module within the 'app' package
11
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
12
 
@@ -27,10 +29,10 @@ logger = logging.getLogger("app")
27
  DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
28
  CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
29
  MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
30
- LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
31
  RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
32
- TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "4"))
33
- TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "2"))
34
 
35
  # -----------------------------
36
  # ✅ Initialize FastAPI App
@@ -55,12 +57,14 @@ try:
55
  top_k_default=TOP_K_SEARCH,
56
  relevance_threshold=RELEVANCE_THRESHOLD
57
  )
 
58
  if not ensure_db_populated(db, CHUNKS_FILE_PATH):
59
  logger.warning("DB not populated on startup. RAG will not function correctly.")
60
  db_ready = False
61
  else:
62
  logger.info("Vector DB is populated and ready.")
63
  db_ready = True
 
64
  except Exception as e:
65
  logger.error(f"FATAL: Failed to initialize Vector DB: {e}", exc_info=True)
66
  db = None
@@ -92,6 +96,12 @@ except Exception as e:
92
  class Query(BaseModel):
93
  question: str
94
 
 
 
 
 
 
 
95
  class Feedback(BaseModel):
96
  request_id: str
97
  question: str
@@ -101,11 +111,83 @@ class Feedback(BaseModel):
101
  comment: str | None = None
102
 
103
  # -----------------------------
104
- # ✅ Endpoints
105
  # -----------------------------
106
  def get_logger_adapter(request: Request):
107
  return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  @app.get("/")
110
  async def root():
111
  return {"status": "✅ Server is running."}
@@ -117,25 +199,16 @@ async def health_check():
117
  "database_status": "ready" if db_ready else "error",
118
  "model_status": "ready" if model_ready else "error"
119
  }
 
120
  if not db_ready or not model_ready:
121
  raise HTTPException(status_code=503, detail=status)
122
  return status
123
 
124
- async def generate_llm_response(prompt: str, request_id: str):
125
- loop = asyncio.get_running_loop()
126
- response = await loop.run_in_executor(
127
- None,
128
- lambda: llm(prompt, max_tokens=1024, stop=["###", "Question:", "Context:", "</s>"], temperature=0.05, echo=False)
129
- )
130
- answer = response["choices"][0]["text"].strip()
131
- if not answer:
132
- raise ValueError("Empty response from LLM")
133
- return answer
134
-
135
  @app.post("/chat")
136
  async def chat(query: Query, request: Request):
137
  adapter = get_logger_adapter(request)
138
- question_lower = query.question.strip().lower()
 
139
 
140
  # --- GREETING & INTRO HANDLING ---
141
  greeting_keywords = ["hello", "hi", "hey", "what can you do", "who are you"]
@@ -159,8 +232,21 @@ async def chat(query: Query, request: Request):
159
 
160
  adapter.info(f"Received query: '{query.question}'")
161
 
162
- # 1. Search Vector DB
163
- search_results = db.search(query.question, top_k=TOP_K_SEARCH)
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
  if not search_results:
166
  adapter.warning("No relevant context found in vector DB.")
@@ -169,36 +255,69 @@ async def chat(query: Query, request: Request):
169
  "context_used": "No relevant context found.",
170
  "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
171
  }
172
-
 
 
 
173
  scores = [f"{result['relevance_score']:.4f}" for result in search_results]
174
- adapter.info(f"Found {len(search_results)} relevant chunks with scores: {scores}")
 
 
 
 
175
 
176
- # 2. Prepare Context
177
- context_chunks = [result['text'] for result in search_results[:TOP_K_CONTEXT]]
 
 
 
 
 
 
 
178
  context = "\n---\n".join(context_chunks)
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
- # 3. Build Prompt with Separator Instruction
 
 
181
  prompt = f"""<|system|>
182
  You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
183
  Your task is to answer the user's question based ONLY on the provided context.
184
 
185
  - **Formatting Rule:** If the answer contains a list of items or steps, you **MUST** separate each item with a pipe symbol (`|`). For example: `First item|Second item|Third item`.
 
186
  - **Content Rule:** If the information is not in the provided context, you **MUST** reply with the exact phrase: "The provided policy context does not contain information on this topic."
187
- </s>
188
  <|user|>
 
189
  ### Relevant Context:
190
- ```
191
  {context}
192
  ```
193
 
194
  ### Question:
 
195
  {query.question}
196
- </s>
197
  <|assistant|>
 
198
  ### Detailed Answer:
 
199
  """
200
 
201
- # 4. Generate Response
202
  answer = "An error occurred while processing your request."
203
  try:
204
  adapter.info("Sending prompt to LLM for generation...")
@@ -206,20 +325,16 @@ Your task is to answer the user's question based ONLY on the provided context.
206
  generate_llm_response(prompt, request.state.request_id),
207
  timeout=LLM_TIMEOUT_SECONDS
208
  )
209
- adapter.info(f"LLM generation successful. Raw response: {raw_answer[:250]}...")
210
 
 
 
211
  # --- POST-PROCESSING LOGIC ---
212
- # Check if the model used the pipe separator, indicating a list.
213
  if '|' in raw_answer:
214
  adapter.info("Pipe separator found. Formatting response as a bulleted list.")
215
- # Split the string into a list of items
216
  items = raw_answer.split('|')
217
- # Clean up each item and format it as a bullet point
218
  cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
219
- # Join them back together with newlines
220
  answer = "\n".join(cleaned_items)
221
  else:
222
- # If no separator, use the answer as is.
223
  answer = raw_answer
224
 
225
  except asyncio.TimeoutError:
@@ -230,6 +345,7 @@ Your task is to answer the user's question based ONLY on the provided context.
230
  answer = "Sorry, an unexpected error occurred while generating a response."
231
 
232
  adapter.info(f"Final answer prepared. Returning to client.")
 
233
  return {
234
  "request_id": request.state.request_id,
235
  "question": query.question,
@@ -237,6 +353,40 @@ Your task is to answer the user's question based ONLY on the provided context.
237
  "answer": answer
238
  }
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  @app.post("/feedback")
241
  async def collect_feedback(feedback: Feedback, request: Request):
242
  adapter = get_logger_adapter(request)
@@ -249,5 +399,6 @@ async def collect_feedback(feedback: Feedback, request: Request):
249
  "feedback": feedback.feedback,
250
  "comment": feedback.comment
251
  }
 
252
  adapter.info(json.dumps(feedback_log))
253
  return {"status": "✅ Feedback recorded. Thank you!"}
 
6
  import re
7
  from fastapi import FastAPI, HTTPException, Request
8
  from pydantic import BaseModel
9
+ from typing import Optional
10
  from llama_cpp import Llama
11
+
12
  # Correctly reference the module within the 'app' package
13
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
14
 
 
29
  DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
30
  CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
31
  MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
32
+ LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
33
  RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
34
+ TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
35
+ TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
36
 
37
  # -----------------------------
38
  # ✅ Initialize FastAPI App
 
57
  top_k_default=TOP_K_SEARCH,
58
  relevance_threshold=RELEVANCE_THRESHOLD
59
  )
60
+
61
  if not ensure_db_populated(db, CHUNKS_FILE_PATH):
62
  logger.warning("DB not populated on startup. RAG will not function correctly.")
63
  db_ready = False
64
  else:
65
  logger.info("Vector DB is populated and ready.")
66
  db_ready = True
67
+
68
  except Exception as e:
69
  logger.error(f"FATAL: Failed to initialize Vector DB: {e}", exc_info=True)
70
  db = None
 
96
  class Query(BaseModel):
97
  question: str
98
 
99
+ class AdvancedQuery(BaseModel):
100
+ question: str
101
+ section_filter: Optional[str] = None
102
+ chunk_type_filter: Optional[str] = None
103
+ top_k: Optional[int] = None
104
+
105
  class Feedback(BaseModel):
106
  request_id: str
107
  question: str
 
111
  comment: str | None = None
112
 
113
  # -----------------------------
114
+ # ✅ Helper Functions
115
  # -----------------------------
116
  def get_logger_adapter(request: Request):
117
  return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
118
 
119
+ def get_chunk_priority(chunk: Dict) -> int:
120
+ """Assign priority to different chunk types for better context selection"""
121
+ priority_order = [
122
+ 'approval_authority',
123
+ 'delegation_summary',
124
+ 'requirement',
125
+ 'method_specific',
126
+ 'board_approval',
127
+ 'financial_concurrence',
128
+ 'composition'
129
+ ]
130
+ chunk_type = chunk['metadata'].get('chunk_type', 'unknown')
131
+ try:
132
+ return priority_order.index(chunk_type)
133
+ except ValueError:
134
+ return len(priority_order) # Lower priority for unknown types
135
+
136
+ def detect_filters(question_lower: str) -> tuple:
137
+ """Detect section and chunk type filters from user question"""
138
+ section_filter = None
139
+ chunk_type_filter = None
140
+
141
+ # Section keyword mapping
142
+ section_keywords = {
143
+ "annexure": "Annexure A",
144
+ "financial concurrence": "Financial Concurrence",
145
+ "guidelines": "Guidelines",
146
+ "section 1": "I", "section i": "I",
147
+ "section 2": "II", "section ii": "II",
148
+ "section 3": "III", "section iii": "III",
149
+ "section 4": "IV", "section iv": "IV"
150
+ }
151
+
152
+ # Chunk type keyword mapping
153
+ chunk_type_keywords = {
154
+ "approval": "approval_authority",
155
+ "delegation": "delegation_summary",
156
+ "requirement": "requirement",
157
+ "method": "method_specific",
158
+ "board": "board_approval",
159
+ "committee": "composition"
160
+ }
161
+
162
+ # Check for section filters
163
+ for keyword, section in section_keywords.items():
164
+ if keyword in question_lower:
165
+ section_filter = section
166
+ break
167
+
168
+ # Check for chunk type filters
169
+ for keyword, chunk_type in chunk_type_keywords.items():
170
+ if keyword in question_lower:
171
+ chunk_type_filter = chunk_type
172
+ break
173
+
174
+ return section_filter, chunk_type_filter
175
+
176
+ async def generate_llm_response(prompt: str, request_id: str):
177
+ loop = asyncio.get_running_loop()
178
+ response = await loop.run_in_executor(
179
+ None,
180
+ lambda: llm(prompt, max_tokens=1024, stop=["###", "Question:", "Context:", ""], temperature=0.05, echo=False)
181
+ )
182
+
183
+ answer = response["choices"][0]["text"].strip()
184
+ if not answer:
185
+ raise ValueError("Empty response from LLM")
186
+ return answer
187
+
188
+ # -----------------------------
189
+ # ✅ Endpoints
190
+ # -----------------------------
191
  @app.get("/")
192
  async def root():
193
  return {"status": "✅ Server is running."}
 
199
  "database_status": "ready" if db_ready else "error",
200
  "model_status": "ready" if model_ready else "error"
201
  }
202
+
203
  if not db_ready or not model_ready:
204
  raise HTTPException(status_code=503, detail=status)
205
  return status
206
 
 
 
 
 
 
 
 
 
 
 
 
207
  @app.post("/chat")
208
  async def chat(query: Query, request: Request):
209
  adapter = get_logger_adapter(request)
210
+ question = query.question.strip()
211
+ question_lower = question.lower()
212
 
213
  # --- GREETING & INTRO HANDLING ---
214
  greeting_keywords = ["hello", "hi", "hey", "what can you do", "who are you"]
 
232
 
233
  adapter.info(f"Received query: '{query.question}'")
234
 
235
+ # 1. Enhanced Search with potential filtering
236
+ section_filter, chunk_type_filter = detect_filters(question_lower)
237
+
238
+ if section_filter or chunk_type_filter:
239
+ adapter.info(f"Detected filters - section: '{section_filter}', chunk_type: '{chunk_type_filter}'")
240
+ search_results = db.search_with_filters(
241
+ query.question,
242
+ top_k=TOP_K_SEARCH,
243
+ section_filter=section_filter,
244
+ chunk_type_filter=chunk_type_filter
245
+ )
246
+ adapter.info(f"Used filtered search")
247
+ else:
248
+ search_results = db.search(query.question, top_k=TOP_K_SEARCH)
249
+ adapter.info(f"Used regular search")
250
 
251
  if not search_results:
252
  adapter.warning("No relevant context found in vector DB.")
 
255
  "context_used": "No relevant context found.",
256
  "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
257
  }
258
+
259
+ # 2. Enhanced logging of retrieved chunks
260
+ chunk_types = [result['metadata'].get('chunk_type', 'unknown') for result in search_results]
261
+ sections = [result['metadata'].get('section', 'unknown') for result in search_results]
262
  scores = [f"{result['relevance_score']:.4f}" for result in search_results]
263
+
264
+ adapter.info(f"Found {len(search_results)} relevant chunks")
265
+ adapter.info(f"Chunk types: {chunk_types}")
266
+ adapter.info(f"Sections: {sections}")
267
+ adapter.info(f"Relevance scores: {scores}")
268
 
269
+ # 3. Prioritize chunk types for better context selection
270
+ prioritized_results = sorted(search_results, key=lambda x: (get_chunk_priority(x), -x['relevance_score']))
271
+
272
+ # Log prioritization results
273
+ prioritized_types = [result['metadata'].get('chunk_type', 'unknown') for result in prioritized_results]
274
+ adapter.info(f"Prioritized chunk types order: {prioritized_types}")
275
+
276
+ # 4. Prepare Context using prioritized results
277
+ context_chunks = [result['text'] for result in prioritized_results[:TOP_K_CONTEXT]]
278
  context = "\n---\n".join(context_chunks)
279
+
280
+ # 5. Enhanced context logging
281
+ context_metadata = []
282
+ for result in prioritized_results[:TOP_K_CONTEXT]:
283
+ metadata = result['metadata']
284
+ context_info = {
285
+ 'section': metadata.get('section', 'unknown'),
286
+ 'clause': metadata.get('clause', 'unknown'),
287
+ 'chunk_type': metadata.get('chunk_type', 'unknown'),
288
+ 'score': f"{result['relevance_score']:.4f}"
289
+ }
290
+ context_metadata.append(context_info)
291
 
292
+ adapter.info(f"Selected context metadata: {context_metadata}")
293
+
294
+ # 6. Build Prompt
295
  prompt = f"""<|system|>
296
  You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
297
  Your task is to answer the user's question based ONLY on the provided context.
298
 
299
  - **Formatting Rule:** If the answer contains a list of items or steps, you **MUST** separate each item with a pipe symbol (`|`). For example: `First item|Second item|Third item`.
300
+
301
  - **Content Rule:** If the information is not in the provided context, you **MUST** reply with the exact phrase: "The provided policy context does not contain information on this topic."
302
+
303
  <|user|>
304
+
305
  ### Relevant Context:
306
+
307
  {context}
308
  ```
309
 
310
  ### Question:
311
+
312
  {query.question}
313
+
314
  <|assistant|>
315
+
316
  ### Detailed Answer:
317
+
318
  """
319
 
320
+ # 7. Generate Response
321
  answer = "An error occurred while processing your request."
322
  try:
323
  adapter.info("Sending prompt to LLM for generation...")
 
325
  generate_llm_response(prompt, request.state.request_id),
326
  timeout=LLM_TIMEOUT_SECONDS
327
  )
 
328
 
329
+ adapter.info(f"LLM generation successful. Raw response: {raw_answer[:250]}...")
330
+
331
  # --- POST-PROCESSING LOGIC ---
 
332
  if '|' in raw_answer:
333
  adapter.info("Pipe separator found. Formatting response as a bulleted list.")
 
334
  items = raw_answer.split('|')
 
335
  cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
 
336
  answer = "\n".join(cleaned_items)
337
  else:
 
338
  answer = raw_answer
339
 
340
  except asyncio.TimeoutError:
 
345
  answer = "Sorry, an unexpected error occurred while generating a response."
346
 
347
  adapter.info(f"Final answer prepared. Returning to client.")
348
+
349
  return {
350
  "request_id": request.state.request_id,
351
  "question": query.question,
 
353
  "answer": answer
354
  }
355
 
356
+ @app.post("/advanced_search")
357
+ async def advanced_search(query: AdvancedQuery, request: Request):
358
+ """Advanced search endpoint with explicit filters"""
359
+ adapter = get_logger_adapter(request)
360
+
361
+ if not db_ready:
362
+ raise HTTPException(status_code=503, detail="Database not ready")
363
+
364
+ adapter.info(f"Advanced search: question='{query.question}', section='{query.section_filter}', chunk_type='{query.chunk_type_filter}'")
365
+
366
+ search_results = db.search_with_filters(
367
+ query.question,
368
+ top_k=query.top_k or TOP_K_SEARCH,
369
+ section_filter=query.section_filter,
370
+ chunk_type_filter=query.chunk_type_filter
371
+ )
372
+
373
+ return {
374
+ "request_id": request.state.request_id,
375
+ "query": query.question,
376
+ "filters": {
377
+ "section": query.section_filter,
378
+ "chunk_type": query.chunk_type_filter
379
+ },
380
+ "results": [
381
+ {
382
+ "text": result['text'],
383
+ "metadata": result['metadata'],
384
+ "relevance_score": result['relevance_score']
385
+ }
386
+ for result in search_results
387
+ ]
388
+ }
389
+
390
  @app.post("/feedback")
391
  async def collect_feedback(feedback: Feedback, request: Request):
392
  adapter = get_logger_adapter(request)
 
399
  "feedback": feedback.feedback,
400
  "comment": feedback.comment
401
  }
402
+
403
  adapter.info(json.dumps(feedback_log))
404
  return {"status": "✅ Feedback recorded. Thank you!"}