Spaces:

Kalpokoch
/

ChatbotDemo

Running

App Files Files

Kalpokoch commited on Aug 21, 2025

Commit

f1d5824

verified ·

1 Parent(s): d5b1ff4

Update app/app.py

Browse files

Files changed (1) hide show

app/app.py +53 -56

app/app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# app.py
 import os
 import json
 import asyncio
@@ -8,12 +7,13 @@ import re
 from fastapi import FastAPI, HTTPException, Request
 from pydantic import BaseModel
 from llama_cpp import Llama
 from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
 # -----------------------------
-# Logging Configuration - minimal logging for performance
 # -----------------------------
-logging.basicConfig(level=logging.WARNING, format='%(asctime)s - %(levelname)s - [%(request_id)s] - %(message)s')
 class RequestIdAdapter(logging.LoggerAdapter):
     def process(self, msg, kwargs):
@@ -22,18 +22,18 @@ class RequestIdAdapter(logging.LoggerAdapter):
 logger = logging.getLogger("app")
 # -----------------------------
-# Configuration
 # -----------------------------
 DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
 CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
 MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
-LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
 RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
 TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
 TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
 # -----------------------------
-# Initialize FastAPI App
 # -----------------------------
 app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.1.0")
@@ -46,8 +46,9 @@ async def add_request_id(request: Request, call_next):
     return response
 # -----------------------------
-# Vector DB and Data Initialization
 # -----------------------------
 try:
     db = PolicyVectorDB(
         persist_directory=DB_PERSIST_DIRECTORY,
@@ -58,15 +59,17 @@ try:
         logger.warning("DB not populated on startup. RAG will not function correctly.")
         db_ready = False
     else:
         db_ready = True
 except Exception as e:
-    logger.error(f"Failed to initialize Vector DB: {e}", exc_info=True)
     db = None
     db_ready = False
 # -----------------------------
-# Load TinyLlama GGUF Model
 # -----------------------------
 try:
     llm = Llama(
         model_path=MODEL_PATH,
@@ -76,14 +79,15 @@ try:
         use_mlock=True,
         verbose=False
     )
     model_ready = True
 except Exception as e:
-    logger.error(f"Failed to load GGUF model: {e}", exc_info=True)
     llm = None
     model_ready = False
 # -----------------------------
-# API Schemas
 # -----------------------------
 class Query(BaseModel):
     question: str
@@ -97,28 +101,7 @@ class Feedback(BaseModel):
     comment: str | None = None
 # -----------------------------
-# Helpers for Hybrid Filtering
-# -----------------------------
-# Minimal stopwords list for English
-STOPWORDS = {
-    "the", "of", "and", "is", "in", "for", "on", "to", "with", "a", "at",
-    "by", "an", "as", "be", "this", "that", "which", "or", "from", "are", "has"
-}
-def extract_keywords(query: str) -> list[str]:
-    tokens = re.findall(r'\w+', query.lower())
-    keywords = [tok for tok in tokens if tok not in STOPWORDS and len(tok) > 2]
-    return keywords
-def matches_keyword(chunk: dict, keywords: list[str]) -> bool:
-    text = chunk.get("text", "").lower()
-    metadata = chunk.get("metadata", {})
-    combined_meta = " ".join(str(v).lower() for v in metadata.values() if v)
-    combined = f"{text} {combined_meta}"
-    return any(kw in combined for kw in keywords)
-# -----------------------------
-# Endpoints
 # -----------------------------
 def get_logger_adapter(request: Request):
     return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
@@ -154,9 +137,10 @@ async def chat(query: Query, request: Request):
     adapter = get_logger_adapter(request)
     question_lower = query.question.strip().lower()
-    # Greeting handling
-    greeting_keywords = {"hello", "hi", "hey", "what can you do", "who are you"}
     if question_lower in greeting_keywords:
         intro_message = (
             "Hello! I am an AI assistant specifically trained on NEEPCO's Delegation of Powers (DoP) policy document. "
             "My purpose is to help you find accurate information and answer questions based on this specific dataset. "
@@ -170,30 +154,30 @@ async def chat(query: Query, request: Request):
         }
     if not db_ready or not model_ready:
         raise HTTPException(status_code=503, detail="Service is not ready. Please check logs.")
-    # Step 1: Search vector DB
     search_results = db.search(query.question, top_k=TOP_K_SEARCH)
     if not search_results:
         return {
             "question": query.question,
             "context_used": "No relevant context found.",
             "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
         }
-    # Step 2: Extract keywords from query
-    query_keywords = extract_keywords(query.question)
-    # Step 3: Keyword + metadata filtering
-    filtered_results = [chunk for chunk in search_results if matches_keyword(chunk, query_keywords)]
-    # Fallback to original results if filtering empty
-    final_results = filtered_results if filtered_results else search_results
-    # Step 4: Prepare context with top chunks
-    context_chunks = [res['text'] for res in final_results[:TOP_K_CONTEXT]]
     context = "\n---\n".join(context_chunks)
-    # Step 5: Build prompt
     prompt = f"""<|system|>
 You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
 Your task is to answer the user's question based ONLY on the provided context.
@@ -203,7 +187,6 @@ Your task is to answer the user's question based ONLY on the provided context.
 </s>
 <|user|>
 ### Relevant Context:
 ```
 {context}
 ```
@@ -215,24 +198,38 @@ Your task is to answer the user's question based ONLY on the provided context.
 ### Detailed Answer:
 """
-    # Step 6: Generate response from LLM
     try:
         raw_answer = await asyncio.wait_for(
             generate_llm_response(prompt, request.state.request_id),
             timeout=LLM_TIMEOUT_SECONDS
         )
-        # Format answer if pipe separator found
-        if "|" in raw_answer:
-            items = raw_answer.split("|")
             cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
             answer = "\n".join(cleaned_items)
         else:
             answer = raw_answer
     except asyncio.TimeoutError:
         answer = "Sorry, the request took too long to process. Please try again with a simpler question."
-    except Exception:
         answer = "Sorry, an unexpected error occurred while generating a response."
     return {
         "request_id": request.state.request_id,
         "question": query.question,
@@ -252,5 +249,5 @@ async def collect_feedback(feedback: Feedback, request: Request):
         "feedback": feedback.feedback,
         "comment": feedback.comment
     }
-    logger.info(json.dumps(feedback_log))
-    return {"status": "✅ Feedback recorded. Thank you!"}

 import os
 import json
 import asyncio
 from fastapi import FastAPI, HTTPException, Request
 from pydantic import BaseModel
 from llama_cpp import Llama
+# Correctly reference the module within the 'app' package
 from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
 # -----------------------------
+# ✅ Logging Configuration
 # -----------------------------
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - [%(request_id)s] - %(message)s')
 class RequestIdAdapter(logging.LoggerAdapter):
     def process(self, msg, kwargs):
 logger = logging.getLogger("app")
 # -----------------------------
+# ✅ Configuration
 # -----------------------------
 DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
 CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
 MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
+LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
 RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
 TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
 TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
 # -----------------------------
+# ✅ Initialize FastAPI App
 # -----------------------------
 app = FastAPI(title="NEEPCO DoP RAG Chatbot", version="2.1.0")
     return response
 # -----------------------------
+# ✅ Vector DB and Data Initialization
 # -----------------------------
+logger.info("Initializing vector DB...")
 try:
     db = PolicyVectorDB(
         persist_directory=DB_PERSIST_DIRECTORY,
         logger.warning("DB not populated on startup. RAG will not function correctly.")
         db_ready = False
     else:
+        logger.info("Vector DB is populated and ready.")
         db_ready = True
 except Exception as e:
+    logger.error(f"FATAL: Failed to initialize Vector DB: {e}", exc_info=True)
     db = None
     db_ready = False
 # -----------------------------
+# ✅ Load TinyLlama GGUF Model
 # -----------------------------
+logger.info(f"Loading GGUF model from: {MODEL_PATH}")
 try:
     llm = Llama(
         model_path=MODEL_PATH,
         use_mlock=True,
         verbose=False
     )
+    logger.info("GGUF model loaded successfully.")
     model_ready = True
 except Exception as e:
+    logger.error(f"FATAL: Failed to load GGUF model: {e}", exc_info=True)
     llm = None
     model_ready = False
 # -----------------------------
+# ✅ API Schemas
 # -----------------------------
 class Query(BaseModel):
     question: str
     comment: str | None = None
 # -----------------------------
+# ✅ Endpoints
 # -----------------------------
 def get_logger_adapter(request: Request):
     return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
     adapter = get_logger_adapter(request)
     question_lower = query.question.strip().lower()
+    # --- GREETING & INTRO HANDLING ---
+    greeting_keywords = ["hello", "hi", "hey", "what can you do", "who are you"]
     if question_lower in greeting_keywords:
+        adapter.info(f"Handling a greeting or introductory query: '{query.question}'")
         intro_message = (
             "Hello! I am an AI assistant specifically trained on NEEPCO's Delegation of Powers (DoP) policy document. "
             "My purpose is to help you find accurate information and answer questions based on this specific dataset. "
         }
     if not db_ready or not model_ready:
+        adapter.error("Service unavailable due to initialization failure.")
         raise HTTPException(status_code=503, detail="Service is not ready. Please check logs.")
+    adapter.info(f"Received query: '{query.question}'")
+    # 1. Search Vector DB
     search_results = db.search(query.question, top_k=TOP_K_SEARCH)
     if not search_results:
+        adapter.warning("No relevant context found in vector DB.")
         return {
             "question": query.question,
             "context_used": "No relevant context found.",
             "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
         }
+    scores = [f"{result['relevance_score']:.4f}" for result in search_results]
+    adapter.info(f"Found {len(search_results)} relevant chunks with scores: {scores}")
+    # 2. Prepare Context
+    context_chunks = [result['text'] for result in search_results[:TOP_K_CONTEXT]]
     context = "\n---\n".join(context_chunks)
+    # 3. Build Prompt with Separator Instruction
     prompt = f"""<|system|>
 You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
 Your task is to answer the user's question based ONLY on the provided context.
 </s>
 <|user|>
 ### Relevant Context:
 ```
 {context}
 ```
 ### Detailed Answer:
 """
+    # 4. Generate Response
+    answer = "An error occurred while processing your request."
     try:
+        adapter.info("Sending prompt to LLM for generation...")
         raw_answer = await asyncio.wait_for(
             generate_llm_response(prompt, request.state.request_id),
             timeout=LLM_TIMEOUT_SECONDS
         )
+        adapter.info(f"LLM generation successful. Raw response: {raw_answer[:250]}...")
+        # --- POST-PROCESSING LOGIC ---
+        # Check if the model used the pipe separator, indicating a list.
+        if '|' in raw_answer:
+            adapter.info("Pipe separator found. Formatting response as a bulleted list.")
+            # Split the string into a list of items
+            items = raw_answer.split('|')
+            # Clean up each item and format it as a bullet point
             cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
+            # Join them back together with newlines
             answer = "\n".join(cleaned_items)
         else:
+            # If no separator, use the answer as is.
             answer = raw_answer
     except asyncio.TimeoutError:
+        adapter.warning(f"LLM generation timed out after {LLM_TIMEOUT_SECONDS} seconds.")
         answer = "Sorry, the request took too long to process. Please try again with a simpler question."
+    except Exception as e:
+        adapter.error(f"An unexpected error occurred during LLM generation: {e}", exc_info=True)
         answer = "Sorry, an unexpected error occurred while generating a response."
+    adapter.info(f"Final answer prepared. Returning to client.")
     return {
         "request_id": request.state.request_id,
         "question": query.question,
         "feedback": feedback.feedback,
         "comment": feedback.comment
     }
+    adapter.info(json.dumps(feedback_log))
+    return {"status": "✅ Feedback recorded. Thank you!"}