Spaces:

Kalpokoch
/

ChatbotDemo

Sleeping

App Files Files

Kalpokoch commited on Aug 21, 2025

Commit

7f343ef

verified ·

1 Parent(s): 784b064

Update app/app.py

Browse files

Files changed (1) hide show

app/app.py +184 -33

app/app.py CHANGED Viewed

@@ -6,7 +6,9 @@ import uuid
 import re
 from fastapi import FastAPI, HTTPException, Request
 from pydantic import BaseModel
 from llama_cpp import Llama
 # Correctly reference the module within the 'app' package
 from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
@@ -27,10 +29,10 @@ logger = logging.getLogger("app")
 DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
 CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
 MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
-LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
 RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
-TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "4"))
-TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "2"))
 # -----------------------------
 # ✅ Initialize FastAPI App
@@ -55,12 +57,14 @@ try:
         top_k_default=TOP_K_SEARCH,
         relevance_threshold=RELEVANCE_THRESHOLD
     )
     if not ensure_db_populated(db, CHUNKS_FILE_PATH):
         logger.warning("DB not populated on startup. RAG will not function correctly.")
         db_ready = False
     else:
         logger.info("Vector DB is populated and ready.")
         db_ready = True
 except Exception as e:
     logger.error(f"FATAL: Failed to initialize Vector DB: {e}", exc_info=True)
     db = None
@@ -92,6 +96,12 @@ except Exception as e:
 class Query(BaseModel):
     question: str
 class Feedback(BaseModel):
     request_id: str
     question: str
@@ -101,11 +111,83 @@ class Feedback(BaseModel):
     comment: str | None = None
 # -----------------------------
-# ✅ Endpoints
 # -----------------------------
 def get_logger_adapter(request: Request):
     return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
 @app.get("/")
 async def root():
     return {"status": "✅ Server is running."}
@@ -117,25 +199,16 @@ async def health_check():
         "database_status": "ready" if db_ready else "error",
         "model_status": "ready" if model_ready else "error"
     }
     if not db_ready or not model_ready:
         raise HTTPException(status_code=503, detail=status)
     return status
-async def generate_llm_response(prompt: str, request_id: str):
-    loop = asyncio.get_running_loop()
-    response = await loop.run_in_executor(
-        None,
-        lambda: llm(prompt, max_tokens=1024, stop=["###", "Question:", "Context:", "</s>"], temperature=0.05, echo=False)
-    )
-    answer = response["choices"][0]["text"].strip()
-    if not answer:
-        raise ValueError("Empty response from LLM")
-    return answer
 @app.post("/chat")
 async def chat(query: Query, request: Request):
     adapter = get_logger_adapter(request)
-    question_lower = query.question.strip().lower()
     # --- GREETING & INTRO HANDLING ---
     greeting_keywords = ["hello", "hi", "hey", "what can you do", "who are you"]
@@ -159,8 +232,21 @@ async def chat(query: Query, request: Request):
     adapter.info(f"Received query: '{query.question}'")
-    # 1. Search Vector DB
-    search_results = db.search(query.question, top_k=TOP_K_SEARCH)
     if not search_results:
         adapter.warning("No relevant context found in vector DB.")
@@ -169,36 +255,69 @@ async def chat(query: Query, request: Request):
             "context_used": "No relevant context found.",
             "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
         }
     scores = [f"{result['relevance_score']:.4f}" for result in search_results]
-    adapter.info(f"Found {len(search_results)} relevant chunks with scores: {scores}")
-    # 2. Prepare Context
-    context_chunks = [result['text'] for result in search_results[:TOP_K_CONTEXT]]
     context = "\n---\n".join(context_chunks)
-    # 3. Build Prompt with Separator Instruction
     prompt = f"""<|system|>
 You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
 Your task is to answer the user's question based ONLY on the provided context.
 - **Formatting Rule:** If the answer contains a list of items or steps, you **MUST** separate each item with a pipe symbol (`|`). For example: `First item|Second item|Third item`.
 - **Content Rule:** If the information is not in the provided context, you **MUST** reply with the exact phrase: "The provided policy context does not contain information on this topic."
-</s>
 <|user|>
 ### Relevant Context:
-```
 {context}
 ```
 ### Question:
 {query.question}
-</s>
 <|assistant|>
 ### Detailed Answer:
 """
-    # 4. Generate Response
     answer = "An error occurred while processing your request."
     try:
         adapter.info("Sending prompt to LLM for generation...")
@@ -206,20 +325,16 @@ Your task is to answer the user's question based ONLY on the provided context.
             generate_llm_response(prompt, request.state.request_id),
             timeout=LLM_TIMEOUT_SECONDS
         )
-        adapter.info(f"LLM generation successful. Raw response: {raw_answer[:250]}...")
         # --- POST-PROCESSING LOGIC ---
-        # Check if the model used the pipe separator, indicating a list.
         if '|' in raw_answer:
             adapter.info("Pipe separator found. Formatting response as a bulleted list.")
-            # Split the string into a list of items
             items = raw_answer.split('|')
-            # Clean up each item and format it as a bullet point
             cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
-            # Join them back together with newlines
             answer = "\n".join(cleaned_items)
         else:
-            # If no separator, use the answer as is.
             answer = raw_answer
     except asyncio.TimeoutError:
@@ -230,6 +345,7 @@ Your task is to answer the user's question based ONLY on the provided context.
         answer = "Sorry, an unexpected error occurred while generating a response."
     adapter.info(f"Final answer prepared. Returning to client.")
     return {
         "request_id": request.state.request_id,
         "question": query.question,
@@ -237,6 +353,40 @@ Your task is to answer the user's question based ONLY on the provided context.
         "answer": answer
     }
 @app.post("/feedback")
 async def collect_feedback(feedback: Feedback, request: Request):
     adapter = get_logger_adapter(request)
@@ -249,5 +399,6 @@ async def collect_feedback(feedback: Feedback, request: Request):
         "feedback": feedback.feedback,
         "comment": feedback.comment
     }
     adapter.info(json.dumps(feedback_log))
     return {"status": "✅ Feedback recorded. Thank you!"}

 import re
 from fastapi import FastAPI, HTTPException, Request
 from pydantic import BaseModel
+from typing import Optional
 from llama_cpp import Llama
 # Correctly reference the module within the 'app' package
 from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
 DB_PERSIST_DIRECTORY = os.getenv("DB_PERSIST_DIRECTORY", "/app/vector_database")
 CHUNKS_FILE_PATH = os.getenv("CHUNKS_FILE_PATH", "/app/granular_chunks_final.jsonl")
 MODEL_PATH = os.getenv("MODEL_PATH", "/app/tinyllama_dop_q4_k_m.gguf")
+LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", "90"))
 RELEVANCE_THRESHOLD = float(os.getenv("RELEVANCE_THRESHOLD", "0.3"))
+TOP_K_SEARCH = int(os.getenv("TOP_K_SEARCH", "3"))
+TOP_K_CONTEXT = int(os.getenv("TOP_K_CONTEXT", "1"))
 # -----------------------------
 # ✅ Initialize FastAPI App
         top_k_default=TOP_K_SEARCH,
         relevance_threshold=RELEVANCE_THRESHOLD
     )
     if not ensure_db_populated(db, CHUNKS_FILE_PATH):
         logger.warning("DB not populated on startup. RAG will not function correctly.")
         db_ready = False
     else:
         logger.info("Vector DB is populated and ready.")
         db_ready = True
 except Exception as e:
     logger.error(f"FATAL: Failed to initialize Vector DB: {e}", exc_info=True)
     db = None
 class Query(BaseModel):
     question: str
+class AdvancedQuery(BaseModel):
+    question: str
+    section_filter: Optional[str] = None
+    chunk_type_filter: Optional[str] = None
+    top_k: Optional[int] = None
 class Feedback(BaseModel):
     request_id: str
     question: str
     comment: str | None = None
 # -----------------------------
+# ✅ Helper Functions
 # -----------------------------
 def get_logger_adapter(request: Request):
     return RequestIdAdapter(logger, {'request_id': getattr(request.state, 'request_id', 'N/A')})
+def get_chunk_priority(chunk: Dict) -> int:
+    """Assign priority to different chunk types for better context selection"""
+    priority_order = [
+        'approval_authority',
+        'delegation_summary',
+        'requirement',
+        'method_specific',
+        'board_approval',
+        'financial_concurrence',
+        'composition'
+    ]
+    chunk_type = chunk['metadata'].get('chunk_type', 'unknown')
+    try:
+        return priority_order.index(chunk_type)
+    except ValueError:
+        return len(priority_order)  # Lower priority for unknown types
+def detect_filters(question_lower: str) -> tuple:
+    """Detect section and chunk type filters from user question"""
+    section_filter = None
+    chunk_type_filter = None
+    # Section keyword mapping
+    section_keywords = {
+        "annexure": "Annexure A",
+        "financial concurrence": "Financial Concurrence",
+        "guidelines": "Guidelines",
+        "section 1": "I", "section i": "I",
+        "section 2": "II", "section ii": "II",
+        "section 3": "III", "section iii": "III",
+        "section 4": "IV", "section iv": "IV"
+    }
+    # Chunk type keyword mapping
+    chunk_type_keywords = {
+        "approval": "approval_authority",
+        "delegation": "delegation_summary",
+        "requirement": "requirement",
+        "method": "method_specific",
+        "board": "board_approval",
+        "committee": "composition"
+    }
+    # Check for section filters
+    for keyword, section in section_keywords.items():
+        if keyword in question_lower:
+            section_filter = section
+            break
+    # Check for chunk type filters
+    for keyword, chunk_type in chunk_type_keywords.items():
+        if keyword in question_lower:
+            chunk_type_filter = chunk_type
+            break
+    return section_filter, chunk_type_filter
+async def generate_llm_response(prompt: str, request_id: str):
+    loop = asyncio.get_running_loop()
+    response = await loop.run_in_executor(
+        None,
+        lambda: llm(prompt, max_tokens=1024, stop=["###", "Question:", "Context:", ""], temperature=0.05, echo=False)
+    )
+    answer = response["choices"][0]["text"].strip()
+    if not answer:
+        raise ValueError("Empty response from LLM")
+    return answer
+# -----------------------------
+# ✅ Endpoints
+# -----------------------------
 @app.get("/")
 async def root():
     return {"status": "✅ Server is running."}
         "database_status": "ready" if db_ready else "error",
         "model_status": "ready" if model_ready else "error"
     }
     if not db_ready or not model_ready:
         raise HTTPException(status_code=503, detail=status)
     return status
 @app.post("/chat")
 async def chat(query: Query, request: Request):
     adapter = get_logger_adapter(request)
+    question = query.question.strip()
+    question_lower = question.lower()
     # --- GREETING & INTRO HANDLING ---
     greeting_keywords = ["hello", "hi", "hey", "what can you do", "who are you"]
     adapter.info(f"Received query: '{query.question}'")
+    # 1. Enhanced Search with potential filtering
+    section_filter, chunk_type_filter = detect_filters(question_lower)
+    if section_filter or chunk_type_filter:
+        adapter.info(f"Detected filters - section: '{section_filter}', chunk_type: '{chunk_type_filter}'")
+        search_results = db.search_with_filters(
+            query.question,
+            top_k=TOP_K_SEARCH,
+            section_filter=section_filter,
+            chunk_type_filter=chunk_type_filter
+        )
+        adapter.info(f"Used filtered search")
+    else:
+        search_results = db.search(query.question, top_k=TOP_K_SEARCH)
+        adapter.info(f"Used regular search")
     if not search_results:
         adapter.warning("No relevant context found in vector DB.")
             "context_used": "No relevant context found.",
             "answer": "Sorry, I could not find a relevant policy to answer that question. Please try rephrasing."
         }
+    # 2. Enhanced logging of retrieved chunks
+    chunk_types = [result['metadata'].get('chunk_type', 'unknown') for result in search_results]
+    sections = [result['metadata'].get('section', 'unknown') for result in search_results]
     scores = [f"{result['relevance_score']:.4f}" for result in search_results]
+    adapter.info(f"Found {len(search_results)} relevant chunks")
+    adapter.info(f"Chunk types: {chunk_types}")
+    adapter.info(f"Sections: {sections}")
+    adapter.info(f"Relevance scores: {scores}")
+    # 3. Prioritize chunk types for better context selection
+    prioritized_results = sorted(search_results, key=lambda x: (get_chunk_priority(x), -x['relevance_score']))
+    # Log prioritization results
+    prioritized_types = [result['metadata'].get('chunk_type', 'unknown') for result in prioritized_results]
+    adapter.info(f"Prioritized chunk types order: {prioritized_types}")
+    # 4. Prepare Context using prioritized results
+    context_chunks = [result['text'] for result in prioritized_results[:TOP_K_CONTEXT]]
     context = "\n---\n".join(context_chunks)
+    # 5. Enhanced context logging
+    context_metadata = []
+    for result in prioritized_results[:TOP_K_CONTEXT]:
+        metadata = result['metadata']
+        context_info = {
+            'section': metadata.get('section', 'unknown'),
+            'clause': metadata.get('clause', 'unknown'),
+            'chunk_type': metadata.get('chunk_type', 'unknown'),
+            'score': f"{result['relevance_score']:.4f}"
+        }
+        context_metadata.append(context_info)
+    adapter.info(f"Selected context metadata: {context_metadata}")
+    # 6. Build Prompt
     prompt = f"""<|system|>
 You are a precise and factual assistant for NEEPCO's Delegation of Powers (DoP) policy.
 Your task is to answer the user's question based ONLY on the provided context.
 - **Formatting Rule:** If the answer contains a list of items or steps, you **MUST** separate each item with a pipe symbol (`|`). For example: `First item|Second item|Third item`.
 - **Content Rule:** If the information is not in the provided context, you **MUST** reply with the exact phrase: "The provided policy context does not contain information on this topic."
 <|user|>
 ### Relevant Context:
 {context}
 ```
 ### Question:
 {query.question}
 <|assistant|>
 ### Detailed Answer:
 """
+    # 7. Generate Response
     answer = "An error occurred while processing your request."
     try:
         adapter.info("Sending prompt to LLM for generation...")
             generate_llm_response(prompt, request.state.request_id),
             timeout=LLM_TIMEOUT_SECONDS
         )
+        adapter.info(f"LLM generation successful. Raw response: {raw_answer[:250]}...")
         # --- POST-PROCESSING LOGIC ---
         if '|' in raw_answer:
             adapter.info("Pipe separator found. Formatting response as a bulleted list.")
             items = raw_answer.split('|')
             cleaned_items = [f"* {item.strip()}" for item in items if item.strip()]
             answer = "\n".join(cleaned_items)
         else:
             answer = raw_answer
     except asyncio.TimeoutError:
         answer = "Sorry, an unexpected error occurred while generating a response."
     adapter.info(f"Final answer prepared. Returning to client.")
     return {
         "request_id": request.state.request_id,
         "question": query.question,
         "answer": answer
     }
+@app.post("/advanced_search")
+async def advanced_search(query: AdvancedQuery, request: Request):
+    """Advanced search endpoint with explicit filters"""
+    adapter = get_logger_adapter(request)
+    if not db_ready:
+        raise HTTPException(status_code=503, detail="Database not ready")
+    adapter.info(f"Advanced search: question='{query.question}', section='{query.section_filter}', chunk_type='{query.chunk_type_filter}'")
+    search_results = db.search_with_filters(
+        query.question,
+        top_k=query.top_k or TOP_K_SEARCH,
+        section_filter=query.section_filter,
+        chunk_type_filter=query.chunk_type_filter
+    )
+    return {
+        "request_id": request.state.request_id,
+        "query": query.question,
+        "filters": {
+            "section": query.section_filter,
+            "chunk_type": query.chunk_type_filter
+        },
+        "results": [
+            {
+                "text": result['text'],
+                "metadata": result['metadata'],
+                "relevance_score": result['relevance_score']
+            }
+            for result in search_results
+        ]
+    }
 @app.post("/feedback")
 async def collect_feedback(feedback: Feedback, request: Request):
     adapter = get_logger_adapter(request)
         "feedback": feedback.feedback,
         "comment": feedback.comment
     }
     adapter.info(json.dumps(feedback_log))
     return {"status": "✅ Feedback recorded. Thank you!"}