Kalpokoch commited on
Commit
012e230
Β·
verified Β·
1 Parent(s): d2159e2

Update app/app.py

Browse files
Files changed (1) hide show
  1. app/app.py +49 -68
app/app.py CHANGED
@@ -8,13 +8,12 @@ import re
8
  from fastapi import FastAPI, HTTPException, Request
9
  from pydantic import BaseModel
10
  from llama_cpp import Llama
11
- # Correctly reference the module within the 'app' package
12
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
13
 
14
  # -----------------------------
15
- # βœ… Logging Configuration
16
  # -----------------------------
17
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s')
18
 
19
  class RequestIdAdapter(logging.LoggerAdapter):
20
  def process(self, msg, kwargs):
@@ -23,18 +22,18 @@ class RequestIdAdapter(logging.LoggerAdapter):
23
  logger = logging.getLogger("app")
24
 
25
  # -----------------------------
26
- # βœ… Configuration
27
  # -----------------------------
28
  DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
29
  CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
30
  MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
31
- LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
32
  RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
33
  TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
34
  TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
35
 
36
  # -----------------------------
37
- # βœ… Initialize FastAPI App
38
  # -----------------------------
39
  app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.1.0")
40
 
@@ -47,9 +46,8 @@ async def add_request_id(request: Request, call_next):
47
  return response
48
 
49
  # -----------------------------
50
- # βœ… Vector DB and Data Initialization
51
  # -----------------------------
52
- logger.info("Initializing vector DB...")
53
  try:
54
  db = PolicyVectorDB(
55
  persist_directory=DB_PERSIST_DIRECTORY,
@@ -60,17 +58,15 @@ try:
60
  logger.warning("DB not populated on startup. RAG will not function correctly.")
61
  db_ready = False
62
  else:
63
- logger.info("Vector DB is populated and ready.")
64
  db_ready = True
65
  except Exception as e:
66
- logger.error(f"FATAL: Failed to initialize Vector DB: {e}", exc_info=True)
67
  db = None
68
  db_ready = False
69
 
70
  # -----------------------------
71
- # βœ… Load TinyLlama GGUF Model
72
  # -----------------------------
73
- logger.info(f"Loading GGUF model from: {MODEL_PATH}")
74
  try:
75
  llm = Llama(
76
  model_path=MODEL_PATH,
@@ -80,15 +76,14 @@ try:
80
  use_mlock=True,
81
  verbose=False
82
  )
83
- logger.info("GGUF model loaded successfully.")
84
  model_ready = True
85
  except Exception as e:
86
- logger.error(f"FATAL: Failed to load GGUF model: {e}", exc_info=True)
87
  llm = None
88
  model_ready = False
89
 
90
  # -----------------------------
91
- # βœ… API Schemas
92
  # -----------------------------
93
  class Query(BaseModel):
94
  question: str
@@ -102,21 +97,28 @@ class Feedback(BaseModel):
102
  comment: str | None = None
103
 
104
  # -----------------------------
105
- # βœ… Helper for metadata-based filtering
106
  # -----------------------------
107
- def is_personnel_related(metadata: dict) -> bool:
108
- # Keywords indicating personnel or HR related policy sections
109
- personnel_keywords = [
110
- "ii", "personnel", "recruitment", "resignation", "hr", "promotion",
111
- "employee", "staff", "service", "termination", "transfer"
112
- ]
113
- section = str(metadata.get("section", "")).lower()
114
- title = str(metadata.get("title", "")).lower()
115
- # Return True if any keyword is found in section or title
116
- return any(kw in section for kw in personnel_keywords) or any(kw in title for kw in personnel_keywords)
 
 
 
 
 
 
 
117
 
118
  # -----------------------------
119
- # βœ… Endpoints
120
  # -----------------------------
121
  def get_logger_adapter(request: Request):
122
  return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
@@ -152,10 +154,9 @@ async def chat(query: Query, request: Request):
152
  adapter = get_logger_adapter(request)
153
  question_lower = query.question.strip().lower()
154
 
155
- # --- GREETING & INTRO HANDLING ---
156
- greeting_keywords = ["hello", "hi", "hey", "what can you do", "who are you"]
157
  if question_lower in greeting_keywords:
158
- adapter.info(f"Handling a greeting or introductory query: '{query.question}'")
159
  intro_message = (
160
  "Hello! I am an AI assistant specifically trained on NEEPCO's Delegation of Powers (DoP) policy document. "
161
  "My purpose is to help you find accurate information and answer questions based on this specific dataset. "
@@ -169,40 +170,30 @@ async def chat(query: Query, request: Request):
169
  }
170
 
171
  if not db_ready or not model_ready:
172
- adapter.error("Service unavailable due to initialization failure.")
173
  raise HTTPException(status_code=503, detail="Service is not ready. Please check logs.")
174
 
175
- adapter.info(f"Received query: '{query.question}'")
176
-
177
- # 1. Search Vector DB
178
  search_results = db.search(query.question, top_k=TOP_K_SEARCH)
179
-
180
  if not search_results:
181
- adapter.warning("No relevant context found in vector DB.")
182
  return {
183
  "question": query.question,
184
  "context_used": "No relevant context found.",
185
  "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
186
  }
187
-
188
- # Post-search metadata-based filtering for personnel/HR queries
189
- if any(keyword in question_lower for keyword in ["personnel", "hr", "recruitment", "resignation",
190
- "promotion", "employee", "termination", "transfer"]):
191
- filtered_results = [res for res in search_results if is_personnel_related(res.get('metadata', {}))]
192
- if filtered_results:
193
- adapter.info(f"Filtered {len(search_results) - len(filtered_results)} irrelevant chunks for personnel query.")
194
- search_results = filtered_results
195
- else:
196
- adapter.info("No personnel-related chunks found after filtering; using unfiltered results.")
197
 
198
- scores = [f"{result['relevance_score']:.4f}" for result in search_results]
199
- adapter.info(f"Found {len(search_results)} relevant chunks with scores: {scores}")
 
 
 
 
 
200
 
201
- # 2. Prepare Context
202
- context_chunks = [result['text'] for result in search_results[:TOP_K_CONTEXT]]
203
  context = "\n---\n".join(context_chunks)
204
 
205
- # 3. Build Prompt with Separator Instruction
206
  prompt = f"""<|system|>
207
  You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
208
  Your task is to answer the user's question based ONLY on the provided context.
@@ -224,34 +215,24 @@ Your task is to answer the user's question based ONLY on the provided context.
224
  ### Detailed Answer:
225
  """
226
 
227
- # 4. Generate Response
228
- answer = "An error occurred while processing your request."
229
  try:
230
- adapter.info("Sending prompt to LLM for generation...")
231
  raw_answer = await asyncio.wait_for(
232
  generate_llm_response(prompt, request.state.request_id),
233
  timeout=LLM_TIMEOUT_SECONDS
234
  )
235
- adapter.info(f"LLM generation successful. Raw response: {raw_answer[:250]}...")
236
-
237
- # --- POST-PROCESSING LOGIC ---
238
- # Check if the model used the pipe separator, indicating a list.
239
- if '|' in raw_answer:
240
- adapter.info("Pipe separator found. Formatting response as a bulleted list.")
241
- items = raw_answer.split('|')
242
  cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
243
  answer = "\n".join(cleaned_items)
244
  else:
245
  answer = raw_answer
246
-
247
  except asyncio.TimeoutError:
248
- adapter.warning(f"LLM generation timed out after {LLM_TIMEOUT_SECONDS} seconds.")
249
  answer = "Sorry, the request took too long to process. Please try again with a simpler question."
250
- except Exception as e:
251
- adapter.error(f"An unexpected error occurred during LLM generation: {e}", exc_info=True)
252
  answer = "Sorry, an unexpected error occurred while generating a response."
253
 
254
- adapter.info(f"Final answer prepared. Returning to client.")
255
  return {
256
  "request_id": request.state.request_id,
257
  "question": query.question,
@@ -271,5 +252,5 @@ async def collect_feedback(feedback: Feedback, request: Request):
271
  "feedback": feedback.feedback,
272
  "comment": feedback.comment
273
  }
274
- adapter.info(json.dumps(feedback_log))
275
- return {"status": "βœ… Feedback recorded. Thank you!"}
 
8
  from fastapi import FastAPI, HTTPException, Request
9
  from pydantic import BaseModel
10
  from llama_cpp import Llama
 
11
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
12
 
13
  # -----------------------------
14
+ # Logging Configuration - minimal logging for performance
15
  # -----------------------------
16
+ logging.basicConfig(level=logging.WARNING, format='%(asctime)s - %(levelname)s - [%(request_id)s] - %(message)s')
17
 
18
  class RequestIdAdapter(logging.LoggerAdapter):
19
  def process(self, msg, kwargs):
 
22
  logger = logging.getLogger("app")
23
 
24
  # -----------------------------
25
+ # Configuration
26
  # -----------------------------
27
  DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
28
  CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
29
  MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
30
+ LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
31
  RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
32
  TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
33
  TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
34
 
35
  # -----------------------------
36
+ # Initialize FastAPI App
37
  # -----------------------------
38
  app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.1.0")
39
 
 
46
  return response
47
 
48
  # -----------------------------
49
+ # Vector DB and Data Initialization
50
  # -----------------------------
 
51
  try:
52
  db = PolicyVectorDB(
53
  persist_directory=DB_PERSIST_DIRECTORY,
 
58
  logger.warning("DB not populated on startup. RAG will not function correctly.")
59
  db_ready = False
60
  else:
 
61
  db_ready = True
62
  except Exception as e:
63
+ logger.error(f"Failed to initialize Vector DB: {e}", exc_info=True)
64
  db = None
65
  db_ready = False
66
 
67
  # -----------------------------
68
+ # Load TinyLlama GGUF Model
69
  # -----------------------------
 
70
  try:
71
  llm = Llama(
72
  model_path=MODEL_PATH,
 
76
  use_mlock=True,
77
  verbose=False
78
  )
 
79
  model_ready = True
80
  except Exception as e:
81
+ logger.error(f"Failed to load GGUF model: {e}", exc_info=True)
82
  llm = None
83
  model_ready = False
84
 
85
  # -----------------------------
86
+ # API Schemas
87
  # -----------------------------
88
  class Query(BaseModel):
89
  question: str
 
97
  comment: str | None = None
98
 
99
  # -----------------------------
100
+ # Helpers for Hybrid Filtering
101
  # -----------------------------
102
+ # Minimal stopwords list for English
103
+ STOPWORDS = {
104
+ "the", "of", "and", "is", "in", "for", "on", "to", "with", "a", "at",
105
+ "by", "an", "as", "be", "this", "that", "which", "or", "from", "are", "has"
106
+ }
107
+
108
+ def extract_keywords(query: str) -> list[str]:
109
+ tokens = re.findall(r'\w+', query.lower())
110
+ keywords = [tok for tok in tokens if tok not in STOPWORDS and len(tok) > 2]
111
+ return keywords
112
+
113
+ def matches_keyword(chunk: dict, keywords: list[str]) -> bool:
114
+ text = chunk.get("text", "").lower()
115
+ metadata = chunk.get("metadata", {})
116
+ combined_meta = " ".join(str(v).lower() for v in metadata.values() if v)
117
+ combined = f"{text} {combined_meta}"
118
+ return any(kw in combined for kw in keywords)
119
 
120
  # -----------------------------
121
+ # Endpoints
122
  # -----------------------------
123
  def get_logger_adapter(request: Request):
124
  return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
 
154
  adapter = get_logger_adapter(request)
155
  question_lower = query.question.strip().lower()
156
 
157
+ # Greeting handling
158
+ greeting_keywords = {"hello", "hi", "hey", "what can you do", "who are you"}
159
  if question_lower in greeting_keywords:
 
160
  intro_message = (
161
  "Hello! I am an AI assistant specifically trained on NEEPCO's Delegation of Powers (DoP) policy document. "
162
  "My purpose is to help you find accurate information and answer questions based on this specific dataset. "
 
170
  }
171
 
172
  if not db_ready or not model_ready:
 
173
  raise HTTPException(status_code=503, detail="Service is not ready. Please check logs.")
174
 
175
+ # Step 1: Search vector DB
 
 
176
  search_results = db.search(query.question, top_k=TOP_K_SEARCH)
 
177
  if not search_results:
 
178
  return {
179
  "question": query.question,
180
  "context_used": "No relevant context found.",
181
  "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
182
  }
 
 
 
 
 
 
 
 
 
 
183
 
184
+ # Step 2: Extract keywords from query
185
+ query_keywords = extract_keywords(query.question)
186
+
187
+ # Step 3: Keyword + metadata filtering
188
+ filtered_results = [chunk for chunk in search_results if matches_keyword(chunk, query_keywords)]
189
+ # Fallback to original results if filtering empty
190
+ final_results = filtered_results if filtered_results else search_results
191
 
192
+ # Step 4: Prepare context with top chunks
193
+ context_chunks = [res['text'] for res in final_results[:TOP_K_CONTEXT]]
194
  context = "\n---\n".join(context_chunks)
195
 
196
+ # Step 5: Build prompt
197
  prompt = f"""<|system|>
198
  You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
199
  Your task is to answer the user's question based ONLY on the provided context.
 
215
  ### Detailed Answer:
216
  """
217
 
218
+ # Step 6: Generate response from LLM
 
219
  try:
 
220
  raw_answer = await asyncio.wait_for(
221
  generate_llm_response(prompt, request.state.request_id),
222
  timeout=LLM_TIMEOUT_SECONDS
223
  )
224
+ # Format answer if pipe separator found
225
+ if "|" in raw_answer:
226
+ items = raw_answer.split("|")
 
 
 
 
227
  cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
228
  answer = "\n".join(cleaned_items)
229
  else:
230
  answer = raw_answer
 
231
  except asyncio.TimeoutError:
 
232
  answer = "Sorry, the request took too long to process. Please try again with a simpler question."
233
+ except Exception:
 
234
  answer = "Sorry, an unexpected error occurred while generating a response."
235
 
 
236
  return {
237
  "request_id": request.state.request_id,
238
  "question": query.question,
 
252
  "feedback": feedback.feedback,
253
  "comment": feedback.comment
254
  }
255
+ logger.info(json.dumps(feedback_log))
256
+ return {"status": "βœ… Feedback recorded. Thank you!"}