Spaces:

Kalpokoch
/

ChatbotDemo

Running

App Files Files

Kalpokoch commited on Aug 20

Commit

012e230

verified ·

1 Parent(s): d2159e2

Update app/app.py

Browse files

Files changed (1) hide show

app/app.py +49 -68

app/app.py CHANGED Viewed

@@ -8,13 +8,12 @@ import re
 from fastapi import FastAPI, HTTPException, Request
 from pydantic import BaseModel
 from llama_cpp import Llama
-# Correctly reference the module within the 'app' package
 from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
 # -----------------------------
-# ✅ Logging Configuration
 # -----------------------------
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s')
 class RequestIdAdapter(logging.LoggerAdapter):
     def process(self, msg, kwargs):
@@ -23,18 +22,18 @@ class RequestIdAdapter(logging.LoggerAdapter):
 logger = logging.getLogger("app")
 # -----------------------------
-# ✅ Configuration
 # -----------------------------
 DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
 CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
 MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
-LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
 RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
 TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
 TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
 # -----------------------------
-# ✅ Initialize FastAPI App
 # -----------------------------
 app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.1.0")
@@ -47,9 +46,8 @@ async def add_request_id(request: Request, call_next):
     return response
 # -----------------------------
-# ✅ Vector DB and Data Initialization
 # -----------------------------
-logger.info("Initializing vector DB...")
 try:
     db = PolicyVectorDB(
         persist_directory=DB_PERSIST_DIRECTORY,
@@ -60,17 +58,15 @@ try:
         logger.warning("DB not populated on startup. RAG will not function correctly.")
         db_ready = False
     else:
-        logger.info("Vector DB is populated and ready.")
         db_ready = True
 except Exception as e:
-    logger.error(f"FATAL: Failed to initialize Vector DB: {e}", exc_info=True)
     db = None
     db_ready = False
 # -----------------------------
-# ✅ Load TinyLlama GGUF Model
 # -----------------------------
-logger.info(f"Loading GGUF model from: {MODEL_PATH}")
 try:
     llm = Llama(
         model_path=MODEL_PATH,
@@ -80,15 +76,14 @@ try:
         use_mlock=True,
         verbose=False
     )
-    logger.info("GGUF model loaded successfully.")
     model_ready = True
 except Exception as e:
-    logger.error(f"FATAL: Failed to load GGUF model: {e}", exc_info=True)
     llm = None
     model_ready = False
 # -----------------------------
-# ✅ API Schemas
 # -----------------------------
 class Query(BaseModel):
     question: str
@@ -102,21 +97,28 @@ class Feedback(BaseModel):
     comment: str | None = None
 # -----------------------------
-# ✅ Helper for metadata-based filtering
 # -----------------------------
-def is_personnel_related(metadata: dict) -> bool:
-    # Keywords indicating personnel or HR related policy sections
-    personnel_keywords = [
-        "ii", "personnel", "recruitment", "resignation", "hr", "promotion",
-        "employee", "staff", "service", "termination", "transfer"
-    ]
-    section = str(metadata.get("section", "")).lower()
-    title = str(metadata.get("title", "")).lower()
-    # Return True if any keyword is found in section or title
-    return any(kw in section for kw in personnel_keywords) or any(kw in title for kw in personnel_keywords)
 # -----------------------------
-# ✅ Endpoints
 # -----------------------------
 def get_logger_adapter(request: Request):
     return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
@@ -152,10 +154,9 @@ async def chat(query: Query, request: Request):
     adapter = get_logger_adapter(request)
     question_lower = query.question.strip().lower()
-    # --- GREETING & INTRO HANDLING ---
-    greeting_keywords = ["hello", "hi", "hey", "what can you do", "who are you"]
     if question_lower in greeting_keywords:
-        adapter.info(f"Handling a greeting or introductory query: '{query.question}'")
         intro_message = (
             "Hello! I am an AI assistant specifically trained on NEEPCO's Delegation of Powers (DoP) policy document. "
             "My purpose is to help you find accurate information and answer questions based on this specific dataset. "
@@ -169,40 +170,30 @@ async def chat(query: Query, request: Request):
         }
     if not db_ready or not model_ready:
-        adapter.error("Service unavailable due to initialization failure.")
         raise HTTPException(status_code=503, detail="Service is not ready. Please check logs.")
-    adapter.info(f"Received query: '{query.question}'")
-    # 1. Search Vector DB
     search_results = db.search(query.question, top_k=TOP_K_SEARCH)
     if not search_results:
-        adapter.warning("No relevant context found in vector DB.")
         return {
             "question": query.question,
             "context_used": "No relevant context found.",
             "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
         }
-    # Post-search metadata-based filtering for personnel/HR queries
-    if any(keyword in question_lower for keyword in ["personnel", "hr", "recruitment", "resignation",
-                                                     "promotion", "employee", "termination", "transfer"]):
-        filtered_results = [res for res in search_results if is_personnel_related(res.get('metadata', {}))]
-        if filtered_results:
-            adapter.info(f"Filtered {len(search_results) - len(filtered_results)} irrelevant chunks for personnel query.")
-            search_results = filtered_results
-        else:
-            adapter.info("No personnel-related chunks found after filtering; using unfiltered results.")
-    scores = [f"{result['relevance_score']:.4f}" for result in search_results]
-    adapter.info(f"Found {len(search_results)} relevant chunks with scores: {scores}")
-    # 2. Prepare Context
-    context_chunks = [result['text'] for result in search_results[:TOP_K_CONTEXT]]
     context = "\n---\n".join(context_chunks)
-    # 3. Build Prompt with Separator Instruction
     prompt = f"""<|system|>
 You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
 Your task is to answer the user's question based ONLY on the provided context.
@@ -224,34 +215,24 @@ Your task is to answer the user's question based ONLY on the provided context.
 ### Detailed Answer:
 """
-    # 4. Generate Response
-    answer = "An error occurred while processing your request."
     try:
-        adapter.info("Sending prompt to LLM for generation...")
         raw_answer = await asyncio.wait_for(
             generate_llm_response(prompt, request.state.request_id),
             timeout=LLM_TIMEOUT_SECONDS
         )
-        adapter.info(f"LLM generation successful. Raw response: {raw_answer[:250]}...")
-        # --- POST-PROCESSING LOGIC ---
-        # Check if the model used the pipe separator, indicating a list.
-        if '|' in raw_answer:
-            adapter.info("Pipe separator found. Formatting response as a bulleted list.")
-            items = raw_answer.split('|')
             cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
             answer = "\n".join(cleaned_items)
         else:
             answer = raw_answer
     except asyncio.TimeoutError:
-        adapter.warning(f"LLM generation timed out after {LLM_TIMEOUT_SECONDS} seconds.")
         answer = "Sorry, the request took too long to process. Please try again with a simpler question."
-    except Exception as e:
-        adapter.error(f"An unexpected error occurred during LLM generation: {e}", exc_info=True)
         answer = "Sorry, an unexpected error occurred while generating a response."
-    adapter.info(f"Final answer prepared. Returning to client.")
     return {
         "request_id": request.state.request_id,
         "question": query.question,
@@ -271,5 +252,5 @@ async def collect_feedback(feedback: Feedback, request: Request):
         "feedback": feedback.feedback,
         "comment": feedback.comment
     }
-    adapter.info(json.dumps(feedback_log))
-    return {"status": "✅ Feedback recorded. Thank you!"}

 from fastapi import FastAPI, HTTPException, Request
 from pydantic import BaseModel
 from llama_cpp import Llama
 from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
 # -----------------------------
+# Logging Configuration - minimal logging for performance
 # -----------------------------
+logging.basicConfig(level=logging.WARNING, format='%(asctime)s - %(levelname)s - [%(request_id)s] - %(message)s')
 class RequestIdAdapter(logging.LoggerAdapter):
     def process(self, msg, kwargs):
 logger = logging.getLogger("app")
 # -----------------------------
+# Configuration
 # -----------------------------
 DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
 CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
 MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
+LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
 RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
 TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
 TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
 # -----------------------------
+# Initialize FastAPI App
 # -----------------------------
 app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.1.0")
     return response
 # -----------------------------
+# Vector DB and Data Initialization
 # -----------------------------
 try:
     db = PolicyVectorDB(
         persist_directory=DB_PERSIST_DIRECTORY,
         logger.warning("DB not populated on startup. RAG will not function correctly.")
         db_ready = False
     else:
         db_ready = True
 except Exception as e:
+    logger.error(f"Failed to initialize Vector DB: {e}", exc_info=True)
     db = None
     db_ready = False
 # -----------------------------
+# Load TinyLlama GGUF Model
 # -----------------------------
 try:
     llm = Llama(
         model_path=MODEL_PATH,
         use_mlock=True,
         verbose=False
     )
     model_ready = True
 except Exception as e:
+    logger.error(f"Failed to load GGUF model: {e}", exc_info=True)
     llm = None
     model_ready = False
 # -----------------------------
+# API Schemas
 # -----------------------------
 class Query(BaseModel):
     question: str
     comment: str | None = None
 # -----------------------------
+# Helpers for Hybrid Filtering
 # -----------------------------
+# Minimal stopwords list for English
+STOPWORDS = {
+    "the", "of", "and", "is", "in", "for", "on", "to", "with", "a", "at",
+    "by", "an", "as", "be", "this", "that", "which", "or", "from", "are", "has"
+}
+def extract_keywords(query: str) -> list[str]:
+    tokens = re.findall(r'\w+', query.lower())
+    keywords = [tok for tok in tokens if tok not in STOPWORDS and len(tok) > 2]
+    return keywords
+def matches_keyword(chunk: dict, keywords: list[str]) -> bool:
+    text = chunk.get("text", "").lower()
+    metadata = chunk.get("metadata", {})
+    combined_meta = " ".join(str(v).lower() for v in metadata.values() if v)
+    combined = f"{text} {combined_meta}"
+    return any(kw in combined for kw in keywords)
 # -----------------------------
+# Endpoints
 # -----------------------------
 def get_logger_adapter(request: Request):
     return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
     adapter = get_logger_adapter(request)
     question_lower = query.question.strip().lower()
+    # Greeting handling
+    greeting_keywords = {"hello", "hi", "hey", "what can you do", "who are you"}
     if question_lower in greeting_keywords:
         intro_message = (
             "Hello! I am an AI assistant specifically trained on NEEPCO's Delegation of Powers (DoP) policy document. "
             "My purpose is to help you find accurate information and answer questions based on this specific dataset. "
         }
     if not db_ready or not model_ready:
         raise HTTPException(status_code=503, detail="Service is not ready. Please check logs.")
+    # Step 1: Search vector DB
     search_results = db.search(query.question, top_k=TOP_K_SEARCH)
     if not search_results:
         return {
             "question": query.question,
             "context_used": "No relevant context found.",
             "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
         }
+    # Step 2: Extract keywords from query
+    query_keywords = extract_keywords(query.question)
+    # Step 3: Keyword + metadata filtering
+    filtered_results = [chunk for chunk in search_results if matches_keyword(chunk, query_keywords)]
+    # Fallback to original results if filtering empty
+    final_results = filtered_results if filtered_results else search_results
+    # Step 4: Prepare context with top chunks
+    context_chunks = [res['text'] for res in final_results[:TOP_K_CONTEXT]]
     context = "\n---\n".join(context_chunks)
+    # Step 5: Build prompt
     prompt = f"""<|system|>
 You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
 Your task is to answer the user's question based ONLY on the provided context.
 ### Detailed Answer:
 """
+    # Step 6: Generate response from LLM
     try:
         raw_answer = await asyncio.wait_for(
             generate_llm_response(prompt, request.state.request_id),
             timeout=LLM_TIMEOUT_SECONDS
         )
+        # Format answer if pipe separator found
+        if "|" in raw_answer:
+            items = raw_answer.split("|")
             cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
             answer = "\n".join(cleaned_items)
         else:
             answer = raw_answer
     except asyncio.TimeoutError:
         answer = "Sorry, the request took too long to process. Please try again with a simpler question."
+    except Exception:
         answer = "Sorry, an unexpected error occurred while generating a response."
     return {
         "request_id": request.state.request_id,
         "question": query.question,
         "feedback": feedback.feedback,
         "comment": feedback.comment
     }
+    logger.info(json.dumps(feedback_log))
+    return {"status": "✅ Feedback recorded. Thank you!"}