Kalpokoch commited on
Commit
f1d5824
Β·
verified Β·
1 Parent(s): d5b1ff4

Update app/app.py

Browse files
Files changed (1) hide show
  1. app/app.py +53 -56
app/app.py CHANGED
@@ -1,4 +1,3 @@
1
- # app.py
2
  import os
3
  import json
4
  import asyncio
@@ -8,12 +7,13 @@ import re
8
  from fastapi import FastAPI, HTTPException, Request
9
  from pydantic import BaseModel
10
  from llama_cpp import Llama
 
11
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
12
 
13
  # -----------------------------
14
- # Logging Configuration - minimal logging for performance
15
  # -----------------------------
16
- logging.basicConfig(level=logging.WARNING, format='%(asctime)s - %(levelname)s - [%(request_id)s] - %(message)s')
17
 
18
  class RequestIdAdapter(logging.LoggerAdapter):
19
  def process(self, msg, kwargs):
@@ -22,18 +22,18 @@ class RequestIdAdapter(logging.LoggerAdapter):
22
  logger = logging.getLogger("app")
23
 
24
  # -----------------------------
25
- # Configuration
26
  # -----------------------------
27
  DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
28
  CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
29
  MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
30
- LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
31
  RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
32
  TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
33
  TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
34
 
35
  # -----------------------------
36
- # Initialize FastAPI App
37
  # -----------------------------
38
  app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.1.0")
39
 
@@ -46,8 +46,9 @@ async def add_request_id(request: Request, call_next):
46
  return response
47
 
48
  # -----------------------------
49
- # Vector DB and Data Initialization
50
  # -----------------------------
 
51
  try:
52
  db = PolicyVectorDB(
53
  persist_directory=DB_PERSIST_DIRECTORY,
@@ -58,15 +59,17 @@ try:
58
  logger.warning("DB not populated on startup. RAG will not function correctly.")
59
  db_ready = False
60
  else:
 
61
  db_ready = True
62
  except Exception as e:
63
- logger.error(f"Failed to initialize Vector DB: {e}", exc_info=True)
64
  db = None
65
  db_ready = False
66
 
67
  # -----------------------------
68
- # Load TinyLlama GGUF Model
69
  # -----------------------------
 
70
  try:
71
  llm = Llama(
72
  model_path=MODEL_PATH,
@@ -76,14 +79,15 @@ try:
76
  use_mlock=True,
77
  verbose=False
78
  )
 
79
  model_ready = True
80
  except Exception as e:
81
- logger.error(f"Failed to load GGUF model: {e}", exc_info=True)
82
  llm = None
83
  model_ready = False
84
 
85
  # -----------------------------
86
- # API Schemas
87
  # -----------------------------
88
  class Query(BaseModel):
89
  question: str
@@ -97,28 +101,7 @@ class Feedback(BaseModel):
97
  comment: str | None = None
98
 
99
  # -----------------------------
100
- # Helpers for Hybrid Filtering
101
- # -----------------------------
102
- # Minimal stopwords list for English
103
- STOPWORDS = {
104
- "the", "of", "and", "is", "in", "for", "on", "to", "with", "a", "at",
105
- "by", "an", "as", "be", "this", "that", "which", "or", "from", "are", "has"
106
- }
107
-
108
- def extract_keywords(query: str) -> list[str]:
109
- tokens = re.findall(r'\w+', query.lower())
110
- keywords = [tok for tok in tokens if tok not in STOPWORDS and len(tok) > 2]
111
- return keywords
112
-
113
- def matches_keyword(chunk: dict, keywords: list[str]) -> bool:
114
- text = chunk.get("text", "").lower()
115
- metadata = chunk.get("metadata", {})
116
- combined_meta = " ".join(str(v).lower() for v in metadata.values() if v)
117
- combined = f"{text} {combined_meta}"
118
- return any(kw in combined for kw in keywords)
119
-
120
- # -----------------------------
121
- # Endpoints
122
  # -----------------------------
123
  def get_logger_adapter(request: Request):
124
  return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
@@ -154,9 +137,10 @@ async def chat(query: Query, request: Request):
154
  adapter = get_logger_adapter(request)
155
  question_lower = query.question.strip().lower()
156
 
157
- # Greeting handling
158
- greeting_keywords = {"hello", "hi", "hey", "what can you do", "who are you"}
159
  if question_lower in greeting_keywords:
 
160
  intro_message = (
161
  "Hello! I am an AI assistant specifically trained on NEEPCO's Delegation of Powers (DoP) policy document. "
162
  "My purpose is to help you find accurate information and answer questions based on this specific dataset. "
@@ -170,30 +154,30 @@ async def chat(query: Query, request: Request):
170
  }
171
 
172
  if not db_ready or not model_ready:
 
173
  raise HTTPException(status_code=503, detail="Service is not ready. Please check logs.")
174
 
175
- # Step 1: Search vector DB
 
 
176
  search_results = db.search(query.question, top_k=TOP_K_SEARCH)
 
177
  if not search_results:
 
178
  return {
179
  "question": query.question,
180
  "context_used": "No relevant context found.",
181
  "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
182
  }
 
 
 
183
 
184
- # Step 2: Extract keywords from query
185
- query_keywords = extract_keywords(query.question)
186
-
187
- # Step 3: Keyword + metadata filtering
188
- filtered_results = [chunk for chunk in search_results if matches_keyword(chunk, query_keywords)]
189
- # Fallback to original results if filtering empty
190
- final_results = filtered_results if filtered_results else search_results
191
-
192
- # Step 4: Prepare context with top chunks
193
- context_chunks = [res['text'] for res in final_results[:TOP_K_CONTEXT]]
194
  context = "\n---\n".join(context_chunks)
195
-
196
- # Step 5: Build prompt
197
  prompt = f"""<|system|>
198
  You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
199
  Your task is to answer the user's question based ONLY on the provided context.
@@ -203,7 +187,6 @@ Your task is to answer the user's question based ONLY on the provided context.
203
  </s>
204
  <|user|>
205
  ### Relevant Context:
206
-
207
  ```
208
  {context}
209
  ```
@@ -215,24 +198,38 @@ Your task is to answer the user's question based ONLY on the provided context.
215
  ### Detailed Answer:
216
  """
217
 
218
- # Step 6: Generate response from LLM
 
219
  try:
 
220
  raw_answer = await asyncio.wait_for(
221
  generate_llm_response(prompt, request.state.request_id),
222
  timeout=LLM_TIMEOUT_SECONDS
223
  )
224
- # Format answer if pipe separator found
225
- if "|" in raw_answer:
226
- items = raw_answer.split("|")
 
 
 
 
 
 
227
  cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
 
228
  answer = "\n".join(cleaned_items)
229
  else:
 
230
  answer = raw_answer
 
231
  except asyncio.TimeoutError:
 
232
  answer = "Sorry, the request took too long to process. Please try again with a simpler question."
233
- except Exception:
 
234
  answer = "Sorry, an unexpected error occurred while generating a response."
235
 
 
236
  return {
237
  "request_id": request.state.request_id,
238
  "question": query.question,
@@ -252,5 +249,5 @@ async def collect_feedback(feedback: Feedback, request: Request):
252
  "feedback": feedback.feedback,
253
  "comment": feedback.comment
254
  }
255
- logger.info(json.dumps(feedback_log))
256
- return {"status": "βœ… Feedback recorded. Thank you!"}
 
 
1
  import os
2
  import json
3
  import asyncio
 
7
  from fastapi import FastAPI, HTTPException, Request
8
  from pydantic import BaseModel
9
  from llama_cpp import Llama
10
+ # Correctly reference the module within the 'app' package
11
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
12
 
13
  # -----------------------------
14
+ # βœ… Logging Configuration
15
  # -----------------------------
16
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s')
17
 
18
  class RequestIdAdapter(logging.LoggerAdapter):
19
  def process(self, msg, kwargs):
 
22
  logger = logging.getLogger("app")
23
 
24
  # -----------------------------
25
+ # βœ… Configuration
26
  # -----------------------------
27
  DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
28
  CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
29
  MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
30
+ LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
31
  RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
32
  TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
33
  TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
34
 
35
  # -----------------------------
36
+ # βœ… Initialize FastAPI App
37
  # -----------------------------
38
  app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.1.0")
39
 
 
46
  return response
47
 
48
  # -----------------------------
49
+ # βœ… Vector DB and Data Initialization
50
  # -----------------------------
51
+ logger.info("Initializing vector DB...")
52
  try:
53
  db = PolicyVectorDB(
54
  persist_directory=DB_PERSIST_DIRECTORY,
 
59
  logger.warning("DB not populated on startup. RAG will not function correctly.")
60
  db_ready = False
61
  else:
62
+ logger.info("Vector DB is populated and ready.")
63
  db_ready = True
64
  except Exception as e:
65
+ logger.error(f"FATAL: Failed to initialize Vector DB: {e}", exc_info=True)
66
  db = None
67
  db_ready = False
68
 
69
  # -----------------------------
70
+ # βœ… Load TinyLlama GGUF Model
71
  # -----------------------------
72
+ logger.info(f"Loading GGUF model from: {MODEL_PATH}")
73
  try:
74
  llm = Llama(
75
  model_path=MODEL_PATH,
 
79
  use_mlock=True,
80
  verbose=False
81
  )
82
+ logger.info("GGUF model loaded successfully.")
83
  model_ready = True
84
  except Exception as e:
85
+ logger.error(f"FATAL: Failed to load GGUF model: {e}", exc_info=True)
86
  llm = None
87
  model_ready = False
88
 
89
  # -----------------------------
90
+ # βœ… API Schemas
91
  # -----------------------------
92
  class Query(BaseModel):
93
  question: str
 
101
  comment: str | None = None
102
 
103
  # -----------------------------
104
+ # βœ… Endpoints
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  # -----------------------------
106
  def get_logger_adapter(request: Request):
107
  return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
 
137
  adapter = get_logger_adapter(request)
138
  question_lower = query.question.strip().lower()
139
 
140
+ # --- GREETING & INTRO HANDLING ---
141
+ greeting_keywords = ["hello", "hi", "hey", "what can you do", "who are you"]
142
  if question_lower in greeting_keywords:
143
+ adapter.info(f"Handling a greeting or introductory query: '{query.question}'")
144
  intro_message = (
145
  "Hello! I am an AI assistant specifically trained on NEEPCO's Delegation of Powers (DoP) policy document. "
146
  "My purpose is to help you find accurate information and answer questions based on this specific dataset. "
 
154
  }
155
 
156
  if not db_ready or not model_ready:
157
+ adapter.error("Service unavailable due to initialization failure.")
158
  raise HTTPException(status_code=503, detail="Service is not ready. Please check logs.")
159
 
160
+ adapter.info(f"Received query: '{query.question}'")
161
+
162
+ # 1. Search Vector DB
163
  search_results = db.search(query.question, top_k=TOP_K_SEARCH)
164
+
165
  if not search_results:
166
+ adapter.warning("No relevant context found in vector DB.")
167
  return {
168
  "question": query.question,
169
  "context_used": "No relevant context found.",
170
  "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
171
  }
172
+
173
+ scores = [f"{result['relevance_score']:.4f}" for result in search_results]
174
+ adapter.info(f"Found {len(search_results)} relevant chunks with scores: {scores}")
175
 
176
+ # 2. Prepare Context
177
+ context_chunks = [result['text'] for result in search_results[:TOP_K_CONTEXT]]
 
 
 
 
 
 
 
 
178
  context = "\n---\n".join(context_chunks)
179
+
180
+ # 3. Build Prompt with Separator Instruction
181
  prompt = f"""<|system|>
182
  You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
183
  Your task is to answer the user's question based ONLY on the provided context.
 
187
  </s>
188
  <|user|>
189
  ### Relevant Context:
 
190
  ```
191
  {context}
192
  ```
 
198
  ### Detailed Answer:
199
  """
200
 
201
+ # 4. Generate Response
202
+ answer = "An error occurred while processing your request."
203
  try:
204
+ adapter.info("Sending prompt to LLM for generation...")
205
  raw_answer = await asyncio.wait_for(
206
  generate_llm_response(prompt, request.state.request_id),
207
  timeout=LLM_TIMEOUT_SECONDS
208
  )
209
+ adapter.info(f"LLM generation successful. Raw response: {raw_answer[:250]}...")
210
+
211
+ # --- POST-PROCESSING LOGIC ---
212
+ # Check if the model used the pipe separator, indicating a list.
213
+ if '|' in raw_answer:
214
+ adapter.info("Pipe separator found. Formatting response as a bulleted list.")
215
+ # Split the string into a list of items
216
+ items = raw_answer.split('|')
217
+ # Clean up each item and format it as a bullet point
218
  cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
219
+ # Join them back together with newlines
220
  answer = "\n".join(cleaned_items)
221
  else:
222
+ # If no separator, use the answer as is.
223
  answer = raw_answer
224
+
225
  except asyncio.TimeoutError:
226
+ adapter.warning(f"LLM generation timed out after {LLM_TIMEOUT_SECONDS} seconds.")
227
  answer = "Sorry, the request took too long to process. Please try again with a simpler question."
228
+ except Exception as e:
229
+ adapter.error(f"An unexpected error occurred during LLM generation: {e}", exc_info=True)
230
  answer = "Sorry, an unexpected error occurred while generating a response."
231
 
232
+ adapter.info(f"Final answer prepared. Returning to client.")
233
  return {
234
  "request_id": request.state.request_id,
235
  "question": query.question,
 
249
  "feedback": feedback.feedback,
250
  "comment": feedback.comment
251
  }
252
+ adapter.info(json.dumps(feedback_log))
253
+ return {"status": "βœ… Feedback recorded. Thank you!"}