Spaces:

skkalwar
/

LLM_Model

Sleeping

App Files Files Community

Shreekant Kalwar (Nokia) commited on Sep 30

Commit

28b14ff

1 Parent(s): 28d4382

major changess

Browse files

Files changed (9) hide show

app.py +38 -5
embedding_model_instance.py +15 -0
llm.py +19 -0
mongo_instance.py +6 -0
qdrant_instance.py +13 -0
requirements.txt +0 -0
util.py +358 -34
util_backup.py +387 -0
util_backup_29_09_2025.py +413 -0

app.py CHANGED Viewed

@@ -3,7 +3,12 @@ from fastapi import FastAPI
 from pydantic import BaseModel
 from fastapi.middleware.cors import CORSMiddleware
 from bot_instance import gemini_bot, llama_bot  # singleton ErrorBot instance
-from typing import List, Optional
 app = FastAPI(title="ErrorBot API")
@@ -24,6 +29,7 @@ class MessageItem(BaseModel):
 class ChatRequest(BaseModel):
     message: str
     history: Optional[List[MessageItem]] = []  # optional conversation history
 # ---------------- Endpoints ----------------
 @app.get("/")
@@ -46,15 +52,42 @@ def root():
 #     return {"reply": answer}
 @app.post("/gemini/chat")
 def gemini_chat(request: ChatRequest):
     history_list = [{"role": msg.role, "content": msg.content} for msg in request.history]
-    answer = gemini_bot.ask(request.message, history=history_list)
-    return {"reply": answer}
 @app.post("/llama/chat")
 def llama_chat(request: ChatRequest):
     history_list = [{"role": msg.role, "content": msg.content} for msg in request.history]
-    answer = llama_bot.ask(request.message, history=history_list)
-    return {"reply": answer}

 from pydantic import BaseModel
 from fastapi.middleware.cors import CORSMiddleware
 from bot_instance import gemini_bot, llama_bot  # singleton ErrorBot instance
+from typing import List, Optional,Any
+import os
+from dotenv import load_dotenv
+from util import ErrorBot
 app = FastAPI(title="ErrorBot API")
 class ChatRequest(BaseModel):
     message: str
     history: Optional[List[MessageItem]] = []  # optional conversation history
+    lastContext: List[Any] = None
 # ---------------- Endpoints ----------------
 @app.get("/")
 #     return {"reply": answer}
+load_dotenv()
+GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5"
 @app.post("/gemini/chat")
 def gemini_chat(request: ChatRequest):
     history_list = [{"role": msg.role, "content": msg.content} for msg in request.history]
+    gemini_bot = ErrorBot(
+        embedding_model_name=EMBEDDING_MODEL,
+        llm_model_name="gemini-2.5-flash",
+        google_api_key=GOOGLE_API_KEY,
+        llm_provider="gemini",
+        last_context = request.lastContext
+    )
+    print("In App.py")
+    print(request.lastContext)
+    answer, last_context = gemini_bot.ask(request.message, history=history_list)
+    print(answer)
+    print(last_context)
+    return {"reply": answer, "last_context": last_context}
 @app.post("/llama/chat")
 def llama_chat(request: ChatRequest):
     history_list = [{"role": msg.role, "content": msg.content} for msg in request.history]
+    llama_bot = ErrorBot(
+        embedding_model_name=EMBEDDING_MODEL,
+        llm_model_name="llama-3.3-70b-versatile",
+        groq_api_key=GROQ_API_KEY,
+        llm_provider="groq",
+        last_context = request.lastContext
+    )
+    answer, last_context = llama_bot.ask(request.message, history=history_list)
+    print(answer)
+    print(last_context)
+    return {"reply": answer, "last_context": last_context}

embedding_model_instance.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import torch
+from sentence_transformers import SentenceTransformer, CrossEncoder
+# --- Embedding model
+EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+embedding_model = SentenceTransformer(EMBEDDING_MODEL, device=device)
+embedding_dim = embedding_model.get_sentence_embedding_dimension()
+reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

llm.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import google.generativeai as genai
+from groq import Groq
+import os
+from dotenv import load_dotenv
+load_dotenv()
+GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+genai.configure(api_key=GOOGLE_API_KEY)
+gemini = genai.GenerativeModel("gemini-2.5-flash")
+groq = Groq(api_key=GROQ_API_KEY)

mongo_instance.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from pymongo import MongoClient
+# Connect to MongoDB
+client = MongoClient("mongodb+srv://dhaval:Dhaval15@cluster0.rwu1ze6.mongodb.net/prontoDB?retryWrites=true&w=majority&appName=Cluster0")  # replace with your URI
+db = client["prontoDB"]

qdrant_instance.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from qdrant_client import QdrantClient, models
+import os
+from dotenv import load_dotenv
+load_dotenv()
+print("Connecting to Qdrant...")
+qdrant = QdrantClient(
+            url=os.getenv("QDRANT_URL"),
+            api_key=os.getenv("QDRANT_API_KEY"),
+        )

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

util.py CHANGED Viewed

@@ -8,6 +8,13 @@ from typing import List, Dict
 import google.generativeai as genai
 from groq import Groq
 def build_content(doc: dict, entity_type: str) -> str:
     """Convert MongoDB document into natural text for embeddings."""
     parts = [f"{entity_type} ID: {doc.get('id', str(doc.get('_id', '')))}"]
@@ -28,40 +35,42 @@ def build_content(doc: dict, entity_type: str) -> str:
 class ErrorBot:
     """Chatbot using RAG (Qdrant + Gemini API)."""
-    def __init__(self, embedding_model_name: str, llm_model_name: str, google_api_key: str = None, groq_api_key: str = None, llm_provider: str = "gemini"):
         print("🚀 Initializing ErrorBot...")
         # --- Embedding model
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        print(f"Using device: {self.device}")
-        self.embedding_model = SentenceTransformer(embedding_model_name, device=self.device)
-        self.embedding_dim = self.embedding_model.get_sentence_embedding_dimension()
         # --- Qdrant client
-        print("Connecting to Qdrant...")
-        self.qdrant = QdrantClient(
-            url=os.getenv("QDRANT_URL"),
-            api_key=os.getenv("QDRANT_API_KEY"),
-        )
         self.collection_name = "technical_errors"
-        self._setup_collection()
         # --- LLM setup
         self.llm_provider = llm_provider.lower()
         self.llm_model_name = llm_model_name
         if self.llm_provider == "gemini":
-            genai.configure(api_key=google_api_key)
-            self.llm = genai.GenerativeModel(llm_model_name)
         elif self.llm_provider == "groq":
-            self.llm = Groq(api_key=groq_api_key)
         else:
             raise ValueError(f"Unsupported LLM provider: {self.llm_provider}")
         # --- Cross encoder reranker
-        self.reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
         print(f"✅ ErrorBot ready with {self.llm_provider.upper()}")
     def _setup_collection(self):
@@ -147,20 +156,108 @@ class ErrorBot:
         return candidates[:top_k]
-    def generate_answer(self, query: str, context: List[Dict], history: list = None):
-        context_str = "\n---\n".join(
-            [f"{c['entity_type']} (Score: {c['score']:.2f}):\n{c['content']}" for c in context]
-        )
         # --- System prompt
-        system_prompt = f"""
-    You are a technical assistant. You have access to Problem Reports (PR), Fault Analyses (FA), and Corrections (CR).
-    Use the provided context and conversation history to answer the question clearly and concisely.
-    If context is not relevant, say you do not have enough information.
-    ### Context
-    {context_str}
-    """
         # --- Conversation history in list-of-dicts format
         convo = []
@@ -187,20 +284,247 @@ class ErrorBot:
                 messages=[{"role": "system", "content": system_prompt}] + convo
             )
             return completion.choices[0].message.content.strip()
     def ask(self, query: str, history: list = None):
         print(f"\n❓ Query: {query}")
-        retrieved_context = self.retrieve(query)
         if not retrieved_context:
             print("💬 No relevant context found.")
-            return "I could not find any relevant information."
-        print(f"✅ Retrieved {len(retrieved_context)} documents.")
-        for i, doc in enumerate(retrieved_context):
-            print(f"  - Context {i+1} ({doc['entity_type']}, ID: {doc['id']}, Score: {doc['score']:.2f})")
-        answer = self.generate_answer(query, retrieved_context, history)
         print(f"\n🤖 Answer: {answer}")
-        return answer

 import google.generativeai as genai
 from groq import Groq
+from embedding_model_instance import embedding_model, embedding_dim, reranker
+from qdrant_instance import qdrant
+from llm import gemini, groq
+from mongo_instance import db
+import json
+from bson import ObjectId
 def build_content(doc: dict, entity_type: str) -> str:
     """Convert MongoDB document into natural text for embeddings."""
     parts = [f"{entity_type} ID: {doc.get('id', str(doc.get('_id', '')))}"]
 class ErrorBot:
     """Chatbot using RAG (Qdrant + Gemini API)."""
+    def __init__(self, embedding_model_name: str, llm_model_name: str, google_api_key: str = None, groq_api_key: str = None, llm_provider: str = "gemini", last_context: list = None):
         print("🚀 Initializing ErrorBot...")
+        self.last_context = last_context
+        print("last_context", last_context)
         # --- Embedding model
+        # self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.embedding_model = embedding_model
+        self.embedding_dim = embedding_dim
+        self.db = db
         # --- Qdrant client
+        self.qdrant = qdrant
         self.collection_name = "technical_errors"
+        #self._setup_collection()
         # --- LLM setup
         self.llm_provider = llm_provider.lower()
         self.llm_model_name = llm_model_name
         if self.llm_provider == "gemini":
+            self.llm = gemini
         elif self.llm_provider == "groq":
+            self.llm = groq
         else:
             raise ValueError(f"Unsupported LLM provider: {self.llm_provider}")
         # --- Cross encoder reranker
+        self.reranker = reranker
         print(f"✅ ErrorBot ready with {self.llm_provider.upper()}")
     def _setup_collection(self):
         return candidates[:top_k]
+    def generate_answer(self, query: str, context: List[Dict], history: list = None, is_followup: bool = False ):
+        """
+        Generates an answer using the LLM, guiding it to identify which context is useful.
+        """
+        context_str=""
+        if(is_followup):
+            pass
+            # Aggregation pipeline
+            # pipeline = [
+            #     # Start with problemReports
+            #     {"$match": {"_id": {"$in": self.last_context}}},
+            #     # Add faultAnalysis
+            #     {"$unionWith": {
+            #         "coll": "faultanalysis",
+            #         "pipeline": [{"$match": {"id": {"$in": self.last_context}}}]
+            #     }},
+            #     # Add corrections
+            #     {"$unionWith": {
+            #         "coll": "corrections",
+            #         "pipeline": [{"$match": {"id": {"$in": self.last_context}}}]
+            #     }}
+            # ]
+            pipeline = [
+                # Start with problemReports
+                {
+                    "$match": {"_id": {"$in": self.last_context}}
+                },
+                {
+                    "$addFields": {"entity_type": "ProblemReport"}
+                },
+                # Add faultAnalysis
+                {
+                    "$unionWith": {
+                        "coll": "faultanalysis",
+                        "pipeline": [
+                            {"$match": {"id": {"$in": self.last_context}}},
+                            {"$addFields": {"entity_type": "FaultAnalysis"}}
+                        ]
+                    }
+                },
+                # Add corrections
+                {
+                    "$unionWith": {
+                        "coll": "corrections",
+                        "pipeline": [
+                            {"$match": {"id": {"$in": self.last_context}}},
+                            {"$addFields": {"entity_type": "Correction"}}
+                        ]
+                    }
+                }
+            ]
+            # Run aggregation on problemReports
+            context_docs = list(db.problemReports.aggregate(pipeline))
+            # Serialize full documents as text for LLM
+            #print(context_docs)
+            context_str = "\n---\n".join(
+                [f"{c['entity_type']} (ID: {c['_id']}):\n{json.dumps(c, default=str)}"
+                for c in context_docs]
+            )
+            print("Context String in Follow Up:")
+            #print(context_str)
+        else:
+            context_str = "\n---\n".join(
+                [f"{c['entity_type']} (Score: {c['score']:.2f}):\n{c['content']}" for c in context]
+            )
         # --- System prompt
+    #     system_prompt = f"""
+    # You are a technical assistant. You have access to Problem Reports (PR), Fault Analyses (FA), and Corrections (CR).
+    # Use the provided context and conversation history to answer the question clearly and concisely.
+    # If context is not relevant, say you do not have enough information.
+    # ### Context
+    # {context_str}
+    # """
+        system_prompt = f"""
+        You are a technical assistant. A user may ask questions about Problem Reports (PR), Fault Analyses (FA), and Corrections (CR).
+        Your task is to:
+        1. Identify which information (PR, FA, CR) is relevant to answering the user's question.
+        2. Explain the solution in simple, clear, actionable language.
+        3. Do not just repeat the content; summarize and explain.
+        ### User Question:
+        ### Context:
+        {context_str}
+        Provide a concise, step-by-step explanation if applicable.
+        """
         # --- Conversation history in list-of-dicts format
         convo = []
                 messages=[{"role": "system", "content": system_prompt}] + convo
             )
             return completion.choices[0].message.content.strip()
+    def fetch_problem_report_with_links(self, pr_id: str):
+        # --- Fetch Problem Report
+        pr_doc = db["problemReports"].find_one({"id": pr_id})
+        if not pr_doc:
+            return None, [], [], [], []
+        if "_id" in pr_doc and isinstance(pr_doc["_id"], ObjectId):
+            pr_doc["_id"] = str(pr_doc["_id"])
+        # --- Extract linked IDs
+        cr_ids = pr_doc.get("correctionIds", [])
+        fa_ids = pr_doc.get("faultAnalysisId", [])
+        # ensure both are lists
+        if isinstance(cr_ids, str):
+            cr_ids = [cr_ids]
+        elif cr_ids is None:
+            cr_ids = []
+        if isinstance(fa_ids, str):
+            fa_ids = [fa_ids]
+        elif fa_ids is None:
+            fa_ids = []
+        # --- Fetch Correction Reports
+        cr_docs = list(db["corrections"].find({"id": {"$in": cr_ids}})) if cr_ids else []
+        for doc in cr_docs:
+            if "_id" in doc and isinstance(doc["_id"], ObjectId):
+                doc["_id"] = str(doc["_id"])
+        # --- Fetch Fault Analysis Reports
+        fa_docs = list(db["faultanalysis"].find({"id": {"$in": fa_ids}})) if fa_ids else []
+        for doc in fa_docs:
+            if "_id" in doc and isinstance(doc["_id"], ObjectId):
+                doc["_id"] = str(doc["_id"])
+        return pr_doc, cr_ids, fa_ids, cr_docs, fa_docs
+    def is_technical_query(self, query: str) -> bool:
+        """
+        Classify query as TECHNICAL or NON-TECHNICAL.
+        """
+        classification_prompt = f"""
+        You are a classifier. Determine if the following query is TECHNICAL
+        (related to software, debugging, errors, troubleshooting, fault analysis,
+        corrections, technical problem reports) or NON-TECHNICAL
+        (general questions, greetings, chit-chat, unrelated topics).
+        Query: "{query}"
+        Respond with exactly one word: "TECHNICAL" or "NON-TECHNICAL".
+        """
+        if self.llm_provider == "gemini":
+            response = self.llm.generate_content(classification_prompt)
+            result = response.text.strip().upper()
+        elif self.llm_provider == "groq":
+            completion = self.llm.chat.completions.create(
+                model=self.llm_model_name,
+                messages=[{"role": "system", "content": classification_prompt}]
+            )
+            result = completion.choices[0].message.content.strip().upper()
+        return result == "TECHNICAL"
+    def is_followup_query(self, query: str, history: list = None) -> bool:
+        """
+        Detect if query is a follow-up based on conversation history.
+        """
+        if not history:
+            return False
+        classification_prompt = f"""
+        You are a classifier. Determine if the following user query
+        is a FOLLOW-UP (depends on the previous conversation)
+        or a NEW QUERY (can be answered independently).
+        Previous conversation:
+        { [msg['content'] for msg in history][-3:] }
+        Current query: "{query}"
+        Respond with exactly one word: "FOLLOW-UP" or "NEW".
+        """
+        if self.llm_provider == "gemini":
+            response = self.llm.generate_content(classification_prompt)
+            result = response.text.strip().upper()
+        elif self.llm_provider == "groq":
+            completion = self.llm.chat.completions.create(
+                model=self.llm_model_name,
+                messages=[{"role": "system", "content": classification_prompt}]
+            )
+            result = completion.choices[0].message.content.strip().upper()
+        print("Follow up: ", result)
+        return result == "FOLLOW-UP"
     def ask(self, query: str, history: list = None):
         print(f"\n❓ Query: {query}")
+        # Step 1: Classify
+        is_technical = self.is_technical_query(query)
+        is_followup = self.is_followup_query(query, history)
+        # Step 2: Non-technical standalone
+        #if not is_technical:
+        if not is_technical and not is_followup:
+            print("⚠️ Non-technical standalone query → skipping Qdrant.")
+            system_prompt = "You are a helpful assistant. Answer clearly and concisely."
+            convo = [{"role": "system", "content": system_prompt},
+                     {"role": "user", "content": query}]
+            if self.llm_provider == "gemini":
+                convo_str = "\n".join([f"{m['role'].capitalize()}: {m['content']}" for m in convo])
+                response = self.llm.generate_content(convo_str)
+                return response.text.strip(), []
+            elif self.llm_provider == "groq":
+                completion = self.llm.chat.completions.create(
+                    model=self.llm_model_name,
+                    messages=convo
+                )
+                return completion.choices[0].message.content.strip(), []
+        # Step 3: Technical or follow-up
+        print("is_followup", is_followup)
+        print("last_context", self.last_context)
+        print("is_technical", is_technical)
+        if is_followup and self.last_context:
+            if not is_technical:
+                print("⚠️ Non-technical followup → skipping Qdrant.")
+                system_prompt = "You are a helpful assistant. Answer clearly and concisely."
+                convo = [{"role": "system", "content": system_prompt},
+                        {"role": "user", "content": query}]
+                if self.llm_provider == "gemini":
+                    convo_str = "\n".join([f"{m['role'].capitalize()}: {m['content']}" for m in convo])
+                    response = self.llm.generate_content(convo_str)
+                    return response.text.strip(), []
+                elif self.llm_provider == "groq":
+                    completion = self.llm.chat.completions.create(
+                        model=self.llm_model_name,
+                        messages=convo
+                    )
+                    return completion.choices[0].message.content.strip(), []
+            else:
+                print("🔄 Follow-up query → reusing previous context.")
+                retrieved_context = self.last_context
+                context_docs = retrieved_context
+        else:
+            print("📥 New technical query → retrieving from Qdrant.")
+            retrieved_context = self.retrieve(query)
+            last_context = []
+            for i, doc in enumerate(retrieved_context):
+                last_context.append(doc['id'])
+                print(f"  - Context {i+1} ({doc['entity_type']}, ID: {doc['id']}, Score: {doc['score']:.2f})")
+            first_doc = retrieved_context[0]
+            context_docs = []
+            # Step 2: Determine starting point based on entity type
+            pr_docs_to_use = []
+            if first_doc["entity_type"] == "ProblemReport":
+                pr_id = first_doc["id"]
+                print(f"📌 Using PR from context1: {pr_id}")
+                pr_doc, cr_ids, fa_ids, cr_docs, fa_docs = self.fetch_problem_report_with_links(pr_id)
+                pr_docs_to_use.append((pr_doc, cr_docs, fa_docs))
+            elif first_doc["entity_type"] == "Correction":
+                cr_id = first_doc["id"]
+                print(f"📌 Using CR from context1: {cr_id}")
+                cr_doc = self.db["corrections"].find_one({"id": cr_id})
+                pr_ids = cr_doc.get("problemReportIds", []) if cr_doc else []
+                if isinstance(pr_ids, str):
+                    pr_ids = [pr_ids]
+                for pr_id in pr_ids:
+                    pr_doc, cr_ids, fa_ids, cr_docs, fa_docs = self.fetch_problem_report_with_links(pr_id)
+                    pr_docs_to_use.append((pr_doc, cr_docs, fa_docs))
+            elif first_doc["entity_type"] == "FaultAnalysis":
+                fa_id = first_doc["id"]
+                print(f"📌 Using FA from context1: {fa_id}")
+                fa_doc = self.db["faultanalysis"].find_one({"id": fa_id})
+                pr_ids = fa_doc.get("problemReportIds", []) if fa_doc else []
+                if isinstance(pr_ids, str):
+                    pr_ids = [pr_ids]
+                for pr_id in pr_ids:
+                    pr_doc, cr_ids, fa_ids, cr_docs, fa_docs = self.fetch_problem_report_with_links(pr_id)
+                    pr_docs_to_use.append((pr_doc, cr_docs, fa_docs))
+            # Step 3: Build context documents for LLM, prioritize CR and FA
+            for pr_doc, cr_docs, fa_docs in pr_docs_to_use:
+                # Include FA first (analysis of problem)
+                for fa in fa_docs:
+                    context_docs.append({
+                        "entity_type": "FaultAnalysis",
+                        "content": build_content(fa, "FaultAnalysis"),
+                        "score": 1.0
+                    })
+                # Include CR next (solutions/corrections)
+                for cr in cr_docs:
+                    context_docs.append({
+                        "entity_type": "Correction",
+                        "content": build_content(cr, "Correction"),
+                        "score": 1.0
+                    })
+                # PR last (problem description)
+                if pr_doc:
+                    context_docs.append({
+                        "entity_type": "ProblemReport",
+                        "content": build_content(pr_doc, "ProblemReport"),
+                        "score": 0.9
+                    })
+            print(f"✅ Total documents for LLM context: {len(context_docs)}")
+            if(len(last_context)>0):
+                self.last_context = context_docs  # save for future follow-ups
         if not retrieved_context:
             print("💬 No relevant context found.")
+            return "I could not find any relevant information.", []
+        print(f"✅ Using {len(retrieved_context)} documents as context.")
+        #answer = self.generate_answer(query, retrieved_context, history, is_followup)
+        answer = self.generate_answer(query, context_docs, history, is_followup)
+        last_context = self.last_context
         print(f"\n🤖 Answer: {answer}")
+        return (answer, last_context)

util_backup.py ADDED Viewed

	@@ -0,0 +1,387 @@

+import os
+import torch
+from qdrant_client import QdrantClient, models
+from sentence_transformers import SentenceTransformer, CrossEncoder
+from pymongo import MongoClient
+from bson import ObjectId
+from typing import List, Dict
+import google.generativeai as genai
+from groq import Groq
+def build_content(doc: dict, entity_type: str) -> str:
+    """Convert MongoDB document into natural text for embeddings."""
+    parts = [f"{entity_type} ID: {doc.get('id', str(doc.get('_id', '')))}"]
+    for k, v in doc.items():
+        if k in ["_id"]:  # skip ObjectId
+            continue
+        if isinstance(v, list):
+            parts.append(f"{k}: {', '.join(map(str, v))}")
+        elif isinstance(v, dict):
+            nested = "; ".join([f"{nk}: {nv}" for nk, nv in v.items() if nv])
+            parts.append(f"{k}: {nested}")
+        else:
+            if v:
+                parts.append(f"{k}: {v}")
+    return "\n".join(parts)
+class ErrorBot:
+    """Chatbot using RAG (Qdrant + Gemini API)."""
+    def __init__(self, embedding_model_name: str, llm_model_name: str, google_api_key: str = None, groq_api_key: str = None, llm_provider: str = "gemini"):
+        print("🚀 Initializing ErrorBot...")
+        self.last_context = None
+        # --- Embedding model
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Using device: {self.device}")
+        self.embedding_model = SentenceTransformer(embedding_model_name, device=self.device)
+        self.embedding_dim = self.embedding_model.get_sentence_embedding_dimension()
+        # --- Qdrant client
+        print("Connecting to Qdrant...")
+        self.qdrant = QdrantClient(
+            url=os.getenv("QDRANT_URL"),
+            api_key=os.getenv("QDRANT_API_KEY"),
+        )
+        self.collection_name = "technical_errors"
+        self._setup_collection()
+        # --- LLM setup
+        self.llm_provider = llm_provider.lower()
+        self.llm_model_name = llm_model_name
+        if self.llm_provider == "gemini":
+            genai.configure(api_key=google_api_key)
+            self.llm = genai.GenerativeModel(llm_model_name)
+        elif self.llm_provider == "groq":
+            self.llm = Groq(api_key=groq_api_key)
+        else:
+            raise ValueError(f"Unsupported LLM provider: {self.llm_provider}")
+        # --- Cross encoder reranker
+        self.reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
+        print(f"✅ ErrorBot ready with {self.llm_provider.upper()}")
+    def _setup_collection(self):
+        if not self.qdrant.collection_exists(self.collection_name):
+            self.qdrant.create_collection(
+                collection_name=self.collection_name,
+                vectors_config=models.VectorParams(
+                    size=self.embedding_dim,
+                    distance=models.Distance.COSINE,
+                ),
+            )
+    def ingest_from_mongodb(self, mongo_uri: str, db_name: str, batch_size: int = 32):
+        client = MongoClient(mongo_uri)
+        db = client[db_name]
+        collections = {
+            "ProblemReport": db["problemReports"],
+            "FaultAnalysis": db["faultanalysis"],
+            "Correction": db["corrections"],
+        }
+        docs = []
+        for entity_type, coll in collections.items():
+            for doc in coll.find():
+                if "_id" in doc and isinstance(doc["_id"], ObjectId):
+                    doc["_id"] = str(doc["_id"])
+                docs.append({"entity_type": entity_type, "data": doc})
+        contents = [build_content(d["data"], d["entity_type"]) for d in docs]
+        all_embeddings = []
+        for i in range(0, len(contents), batch_size):
+            batch_contents = contents[i:i + batch_size]
+            embeddings = self.embedding_model.encode(batch_contents, show_progress_bar=True).tolist()
+            all_embeddings.extend(embeddings)
+        self.qdrant.upsert(
+            collection_name=self.collection_name,
+            points=[
+                models.PointStruct(
+                    id=i,
+                    vector=emb,
+                    payload={
+                        "id": d["data"].get("id", str(d["data"].get("_id", i))),
+                        "entity_type": d["entity_type"],
+                        "raw": d["data"],
+                        "content": c,
+                    },
+                )
+                for i, (d, emb, c) in enumerate(zip(docs, all_embeddings, contents))
+            ],
+            wait=True,
+        )
+        print(f"✅ Ingested {len(docs)} documents into '{self.collection_name}'")
+    def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.3, rerank: bool = True):
+        query_embedding = self.embedding_model.encode(query).tolist()
+        hits = self.qdrant.query_points(
+            collection_name=self.collection_name,
+            query=query_embedding,
+            limit=top_k * 3 if rerank else top_k,
+            with_payload=True,
+            score_threshold=score_threshold,
+        ).points
+        candidates = [
+            {
+                "id": hit.payload.get("id"),
+                "entity_type": hit.payload.get("entity_type", ""),
+                "content": hit.payload.get("content", ""),
+                "score": hit.score,
+            }
+            for hit in hits
+        ]
+        if rerank and candidates:
+            pairs = [(query, c["content"]) for c in candidates]
+            scores = self.reranker.predict(pairs)
+            for i, score in enumerate(scores):
+                candidates[i]["rerank_score"] = float(score)
+            candidates = sorted(candidates, key=lambda x: x["rerank_score"], reverse=True)
+        return candidates[:top_k]
+    def generate_answer(self, query: str, context: List[Dict], history: list = None):
+        context_str = "\n---\n".join(
+            [f"{c['entity_type']} (Score: {c['score']:.2f}):\n{c['content']}" for c in context]
+        )
+        # --- System prompt
+        system_prompt = f"""
+    You are a technical assistant. You have access to Problem Reports (PR), Fault Analyses (FA), and Corrections (CR).
+    Use the provided context and conversation history to answer the question clearly and concisely.
+    If context is not relevant, say you do not have enough information.
+    ### Context
+    {context_str}
+    """
+        # --- Conversation history in list-of-dicts format
+        convo = []
+        if history:
+            for msg in history:
+                convo.append({
+                    "role": "user" if msg["role"] == "user" else "assistant",
+                    "content": msg["content"],
+                })
+        convo.append({"role": "user", "content": query})
+        # --- Gemini flow
+        if self.llm_provider == "gemini":
+            convo_str = "\n".join([f"{m['role'].capitalize()}: {m['content']}" for m in convo])
+            prompt = system_prompt + "\n\n" + convo_str + "\nAssistant:"
+            response = self.llm.generate_content(prompt)
+            return response.text.strip()
+        # --- Groq flow
+        elif self.llm_provider == "groq":
+            completion = self.llm.chat.completions.create(
+                model=self.llm_model_name,
+                messages=[{"role": "system", "content": system_prompt}] + convo
+            )
+            return completion.choices[0].message.content.strip()
+    # def ask(self, query: str, history: list = None):
+    #     print(f"\n❓ Query: {query}")
+    #     retrieved_context = self.retrieve(query)
+    #     if not retrieved_context:
+    #         print("💬 No relevant context found.")
+    #         return "I could not find any relevant information."
+    #     print(f"✅ Retrieved {len(retrieved_context)} documents.")
+    #     for i, doc in enumerate(retrieved_context):
+    #         print(f"  - Context {i+1} ({doc['entity_type']}, ID: {doc['id']}, Score: {doc['score']:.2f})")
+    #     answer = self.generate_answer(query, retrieved_context, history)
+    #     print(f"\n🤖 Answer: {answer}")
+    #     return answer
+    # def is_technical_query(self, query: str) -> bool:
+    #     """
+    #     Ask the LLM to classify whether a query is technical or not.
+    #     Returns True if technical, False otherwise.
+    #     """
+    #     classification_prompt = f"""
+    #     You are a classifier. Determine if the following query is TECHNICAL
+    #     (related to software, debugging, errors, troubleshooting, fault analysis,
+    #     corrections, technical problem reports) or NON-TECHNICAL
+    #     (general questions, greetings, chit-chat, unrelated topics).
+    #     Query: "{query}"
+    #     Respond with exactly one word: "TECHNICAL" or "NON-TECHNICAL".
+    #     """
+    #     if self.llm_provider == "gemini":
+    #         response = self.llm.generate_content(classification_prompt)
+    #         result = response.text.strip().upper()
+    #     elif self.llm_provider == "groq":
+    #         completion = self.llm.chat.completions.create(
+    #             model=self.llm_model_name,
+    #             messages=[{"role": "system", "content": classification_prompt}]
+    #         )
+    #         result = completion.choices[0].message.content.strip().upper()
+    #     else:
+    #         raise ValueError(f"Unsupported LLM provider: {self.llm_provider}")
+    #     return result == "TECHNICAL"
+    # def ask(self, query: str, history: list = None):
+    #     print(f"\n❓ Query: {query}")
+    #     # --- Step 1: Check if query is technical
+    #     if not self.is_technical_query(query):
+    #         print("⚠️ Non-technical query detected → skipping Qdrant.")
+    #         # Minimal system prompt for non-technical queries
+    #         system_prompt = "You are a helpful assistant. Answer clearly and concisely."
+    #         convo = [{"role": "system", "content": system_prompt},
+    #                  {"role": "user", "content": query}]
+    #         if self.llm_provider == "gemini":
+    #             convo_str = "\n".join([f"{m['role'].capitalize()}: {m['content']}" for m in convo])
+    #             response = self.llm.generate_content(convo_str)
+    #             return response.text.strip()
+    #         elif self.llm_provider == "groq":
+    #             completion = self.llm.chat.completions.create(
+    #                 model=self.llm_model_name,
+    #                 messages=convo
+    #             )
+    #             return completion.choices[0].message.content.strip()
+    #     # --- Step 2: If technical, go through retrieval
+    #     retrieved_context = self.retrieve(query)
+    #     if not retrieved_context:
+    #         print("💬 No relevant context found.")
+    #         return "I could not find any relevant information."
+    #     print(f"✅ Retrieved {len(retrieved_context)} documents.")
+    #     for i, doc in enumerate(retrieved_context):
+    #         print(f"  - Context {i+1} ({doc['entity_type']}, ID: {doc['id']}, Score: {doc['score']:.2f})")
+    #     answer = self.generate_answer(query, retrieved_context, history)
+    #     print(f"\n🤖 Answer: {answer}")
+    #     return answer
+    def is_technical_query(self, query: str) -> bool:
+        """
+        Classify query as TECHNICAL or NON-TECHNICAL.
+        """
+        classification_prompt = f"""
+        You are a classifier. Determine if the following query is TECHNICAL
+        (related to software, debugging, errors, troubleshooting, fault analysis,
+        corrections, technical problem reports) or NON-TECHNICAL
+        (general questions, greetings, chit-chat, unrelated topics).
+        Query: "{query}"
+        Respond with exactly one word: "TECHNICAL" or "NON-TECHNICAL".
+        """
+        if self.llm_provider == "gemini":
+            response = self.llm.generate_content(classification_prompt)
+            result = response.text.strip().upper()
+        elif self.llm_provider == "groq":
+            completion = self.llm.chat.completions.create(
+                model=self.llm_model_name,
+                messages=[{"role": "system", "content": classification_prompt}]
+            )
+            result = completion.choices[0].message.content.strip().upper()
+        return result == "TECHNICAL"
+    def is_followup_query(self, query: str, history: list = None) -> bool:
+        """
+        Detect if query is a follow-up based on conversation history.
+        """
+        if not history:
+            return False
+        classification_prompt = f"""
+        You are a classifier. Determine if the following user query
+        is a FOLLOW-UP (depends on the previous conversation)
+        or a NEW QUERY (can be answered independently).
+        Previous conversation:
+        { [msg['content'] for msg in history][-3:] }
+        Current query: "{query}"
+        Respond with exactly one word: "FOLLOW-UP" or "NEW".
+        """
+        if self.llm_provider == "gemini":
+            response = self.llm.generate_content(classification_prompt)
+            result = response.text.strip().upper()
+        elif self.llm_provider == "groq":
+            completion = self.llm.chat.completions.create(
+                model=self.llm_model_name,
+                messages=[{"role": "system", "content": classification_prompt}]
+            )
+            result = completion.choices[0].message.content.strip().upper()
+        return result == "FOLLOW-UP"
+    def ask(self, query: str, history: list = None):
+        print(f"\n❓ Query: {query}")
+        # Step 1: Classify
+        is_technical = self.is_technical_query(query)
+        is_followup = self.is_followup_query(query, history)
+        # Step 2: Non-technical standalone
+        if not is_technical and not is_followup:
+            print("⚠️ Non-technical standalone query → skipping Qdrant.")
+            system_prompt = "You are a helpful assistant. Answer clearly and concisely."
+            convo = [{"role": "system", "content": system_prompt},
+                     {"role": "user", "content": query}]
+            if self.llm_provider == "gemini":
+                convo_str = "\n".join([f"{m['role'].capitalize()}: {m['content']}" for m in convo])
+                response = self.llm.generate_content(convo_str)
+                return response.text.strip()
+            elif self.llm_provider == "groq":
+                completion = self.llm.chat.completions.create(
+                    model=self.llm_model_name,
+                    messages=convo
+                )
+                return completion.choices[0].message.content.strip()
+        # Step 3: Technical or follow-up
+        if is_followup and self.last_context:
+            print("🔄 Follow-up query → reusing previous context.")
+            retrieved_context = self.last_context
+        else:
+            print("📥 New technical query → retrieving from Qdrant.")
+            retrieved_context = self.retrieve(query)
+            self.last_context = retrieved_context  # save for future follow-ups
+        if not retrieved_context:
+            print("💬 No relevant context found.")
+            return "I could not find any relevant information."
+        print(f"✅ Using {len(retrieved_context)} documents as context.")
+        answer = self.generate_answer(query, retrieved_context, history)
+        print(f"\n🤖 Answer: {answer}")
+        return answer

util_backup_29_09_2025.py ADDED Viewed

	@@ -0,0 +1,413 @@

+import os
+import torch
+from qdrant_client import QdrantClient, models
+from sentence_transformers import SentenceTransformer, CrossEncoder
+from pymongo import MongoClient
+from bson import ObjectId
+from typing import List, Dict
+import google.generativeai as genai
+from groq import Groq
+from embedding_model_instance import embedding_model, embedding_dim, reranker
+from qdrant_instance import qdrant
+from llm import gemini, groq
+from mongo_instance import db
+import json
+from bson import ObjectId
+def build_content(doc: dict, entity_type: str) -> str:
+    """Convert MongoDB document into natural text for embeddings."""
+    parts = [f"{entity_type} ID: {doc.get('id', str(doc.get('_id', '')))}"]
+    for k, v in doc.items():
+        if k in ["_id"]:  # skip ObjectId
+            continue
+        if isinstance(v, list):
+            parts.append(f"{k}: {', '.join(map(str, v))}")
+        elif isinstance(v, dict):
+            nested = "; ".join([f"{nk}: {nv}" for nk, nv in v.items() if nv])
+            parts.append(f"{k}: {nested}")
+        else:
+            if v:
+                parts.append(f"{k}: {v}")
+    return "\n".join(parts)
+class ErrorBot:
+    """Chatbot using RAG (Qdrant + Gemini API)."""
+    def __init__(self, embedding_model_name: str, llm_model_name: str, google_api_key: str = None, groq_api_key: str = None, llm_provider: str = "gemini", last_context: list = None):
+        print("🚀 Initializing ErrorBot...")
+        self.last_context = last_context
+        print("last_context", last_context)
+        # --- Embedding model
+        # self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.embedding_model = embedding_model
+        self.embedding_dim = embedding_dim
+        self.db = db
+        # --- Qdrant client
+        self.qdrant = qdrant
+        self.collection_name = "technical_errors"
+        #self._setup_collection()
+        # --- LLM setup
+        self.llm_provider = llm_provider.lower()
+        self.llm_model_name = llm_model_name
+        if self.llm_provider == "gemini":
+            self.llm = gemini
+        elif self.llm_provider == "groq":
+            self.llm = groq
+        else:
+            raise ValueError(f"Unsupported LLM provider: {self.llm_provider}")
+        # --- Cross encoder reranker
+        self.reranker = reranker
+        print(f"✅ ErrorBot ready with {self.llm_provider.upper()}")
+    def _setup_collection(self):
+        if not self.qdrant.collection_exists(self.collection_name):
+            self.qdrant.create_collection(
+                collection_name=self.collection_name,
+                vectors_config=models.VectorParams(
+                    size=self.embedding_dim,
+                    distance=models.Distance.COSINE,
+                ),
+            )
+    def ingest_from_mongodb(self, mongo_uri: str, db_name: str, batch_size: int = 32):
+        client = MongoClient(mongo_uri)
+        db = client[db_name]
+        collections = {
+            "ProblemReport": db["problemReports"],
+            "FaultAnalysis": db["faultanalysis"],
+            "Correction": db["corrections"],
+        }
+        docs = []
+        for entity_type, coll in collections.items():
+            for doc in coll.find():
+                if "_id" in doc and isinstance(doc["_id"], ObjectId):
+                    doc["_id"] = str(doc["_id"])
+                docs.append({"entity_type": entity_type, "data": doc})
+        contents = [build_content(d["data"], d["entity_type"]) for d in docs]
+        all_embeddings = []
+        for i in range(0, len(contents), batch_size):
+            batch_contents = contents[i:i + batch_size]
+            embeddings = self.embedding_model.encode(batch_contents, show_progress_bar=True).tolist()
+            all_embeddings.extend(embeddings)
+        self.qdrant.upsert(
+            collection_name=self.collection_name,
+            points=[
+                models.PointStruct(
+                    id=i,
+                    vector=emb,
+                    payload={
+                        "id": d["data"].get("id", str(d["data"].get("_id", i))),
+                        "entity_type": d["entity_type"],
+                        "raw": d["data"],
+                        "content": c,
+                    },
+                )
+                for i, (d, emb, c) in enumerate(zip(docs, all_embeddings, contents))
+            ],
+            wait=True,
+        )
+        print(f"✅ Ingested {len(docs)} documents into '{self.collection_name}'")
+    def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.3, rerank: bool = True):
+        query_embedding = self.embedding_model.encode(query).tolist()
+        hits = self.qdrant.query_points(
+            collection_name=self.collection_name,
+            query=query_embedding,
+            limit=top_k * 3 if rerank else top_k,
+            with_payload=True,
+            score_threshold=score_threshold,
+        ).points
+        candidates = [
+            {
+                "id": hit.payload.get("id"),
+                "entity_type": hit.payload.get("entity_type", ""),
+                "content": hit.payload.get("content", ""),
+                "score": hit.score,
+            }
+            for hit in hits
+        ]
+        if rerank and candidates:
+            pairs = [(query, c["content"]) for c in candidates]
+            scores = self.reranker.predict(pairs)
+            for i, score in enumerate(scores):
+                candidates[i]["rerank_score"] = float(score)
+            candidates = sorted(candidates, key=lambda x: x["rerank_score"], reverse=True)
+        return candidates[:top_k]
+    def generate_answer(self, query: str, context: List[Dict], history: list = None, is_followup: bool = False ):
+        """
+        Generates an answer using the LLM, guiding it to identify which context is useful.
+        """
+        context_str=""
+        if(is_followup):
+            pass
+            # Aggregation pipeline
+            pipeline = [
+                # Start with problemReports
+                {"$match": {"_id": {"$in": self.last_context}}},
+                # Add faultAnalysis
+                {"$unionWith": {
+                    "coll": "faultanalysis",
+                    "pipeline": [{"$match": {"id": {"$in": self.last_context}}}]
+                }},
+                # Add corrections
+                {"$unionWith": {
+                    "coll": "corrections",
+                    "pipeline": [{"$match": {"id": {"$in": self.last_context}}}]
+                }}
+            ]
+            # Run aggregation on problemReports
+            context_docs = list(db.problemReports.aggregate(pipeline))
+            # Serialize full documents as text for LLM
+            #print(context_docs)
+            context_str = "\n---\n".join(
+                [f"{c.get('entity_type', 'Unknown')} (ID: {c['_id']}):\n{json.dumps(c, default=str)}"
+                for c in context_docs]
+            )
+            print("Context String in Follow Up:")
+            #print(context_str)
+        else:
+            context_str = "\n---\n".join(
+                [f"{c['entity_type']} (Score: {c['score']:.2f}):\n{c['content']}" for c in context]
+            )
+        # --- System prompt
+    #     system_prompt = f"""
+    # You are a technical assistant. You have access to Problem Reports (PR), Fault Analyses (FA), and Corrections (CR).
+    # Use the provided context and conversation history to answer the question clearly and concisely.
+    # If context is not relevant, say you do not have enough information.
+    # ### Context
+    # {context_str}
+    # """
+        system_prompt = f"""
+        You are a technical assistant. A user may ask questions about Problem Reports (PR), Fault Analyses (FA), and Corrections (CR).
+        Your task is to:
+        1. Identify which information (PR, FA, CR) is relevant to answering the user's question.
+        2. Explain the solution in simple, clear, actionable language.
+        3. Do not just repeat the content; summarize and explain.
+        ### User Question:
+        ### Context:
+        {context_str}
+        Provide a concise, step-by-step explanation if applicable.
+        """
+        # --- Conversation history in list-of-dicts format
+        convo = []
+        if history:
+            for msg in history:
+                convo.append({
+                    "role": "user" if msg["role"] == "user" else "assistant",
+                    "content": msg["content"],
+                })
+        convo.append({"role": "user", "content": query})
+        # --- Gemini flow
+        if self.llm_provider == "gemini":
+            convo_str = "\n".join([f"{m['role'].capitalize()}: {m['content']}" for m in convo])
+            prompt = system_prompt + "\n\n" + convo_str + "\nAssistant:"
+            response = self.llm.generate_content(prompt)
+            return response.text.strip()
+        # --- Groq flow
+        elif self.llm_provider == "groq":
+            completion = self.llm.chat.completions.create(
+                model=self.llm_model_name,
+                messages=[{"role": "system", "content": system_prompt}] + convo
+            )
+            return completion.choices[0].message.content.strip()
+    def fetch_problem_report_with_links(self, pr_id: str):
+        # --- Fetch Problem Report
+        pr_doc = db["problemReports"].find_one({"id": pr_id})
+        if not pr_doc:
+            return None, [], [], [], []
+        if "_id" in pr_doc and isinstance(pr_doc["_id"], ObjectId):
+            pr_doc["_id"] = str(pr_doc["_id"])
+        # --- Extract linked IDs
+        cr_ids = pr_doc.get("correctionIds", [])
+        fa_ids = pr_doc.get("faultAnalysisId", [])
+        # ensure both are lists
+        if isinstance(cr_ids, str):
+            cr_ids = [cr_ids]
+        elif cr_ids is None:
+            cr_ids = []
+        if isinstance(fa_ids, str):
+            fa_ids = [fa_ids]
+        elif fa_ids is None:
+            fa_ids = []
+        # --- Fetch Correction Reports
+        cr_docs = list(db["corrections"].find({"id": {"$in": cr_ids}})) if cr_ids else []
+        for doc in cr_docs:
+            if "_id" in doc and isinstance(doc["_id"], ObjectId):
+                doc["_id"] = str(doc["_id"])
+        # --- Fetch Fault Analysis Reports
+        fa_docs = list(db["faultanalysis"].find({"id": {"$in": fa_ids}})) if fa_ids else []
+        for doc in fa_docs:
+            if "_id" in doc and isinstance(doc["_id"], ObjectId):
+                doc["_id"] = str(doc["_id"])
+        return pr_doc, cr_ids, fa_ids, cr_docs, fa_docs
+    def is_technical_query(self, query: str) -> bool:
+        """
+        Classify query as TECHNICAL or NON-TECHNICAL.
+        """
+        classification_prompt = f"""
+        You are a classifier. Determine if the following query is TECHNICAL
+        (related to software, debugging, errors, troubleshooting, fault analysis,
+        corrections, technical problem reports) or NON-TECHNICAL
+        (general questions, greetings, chit-chat, unrelated topics).
+        Query: "{query}"
+        Respond with exactly one word: "TECHNICAL" or "NON-TECHNICAL".
+        """
+        if self.llm_provider == "gemini":
+            response = self.llm.generate_content(classification_prompt)
+            result = response.text.strip().upper()
+        elif self.llm_provider == "groq":
+            completion = self.llm.chat.completions.create(
+                model=self.llm_model_name,
+                messages=[{"role": "system", "content": classification_prompt}]
+            )
+            result = completion.choices[0].message.content.strip().upper()
+        return result == "TECHNICAL"
+    def is_followup_query(self, query: str, history: list = None) -> bool:
+        """
+        Detect if query is a follow-up based on conversation history.
+        """
+        if not history:
+            return False
+        classification_prompt = f"""
+        You are a classifier. Determine if the following user query
+        is a FOLLOW-UP (depends on the previous conversation)
+        or a NEW QUERY (can be answered independently).
+        Previous conversation:
+        { [msg['content'] for msg in history][-3:] }
+        Current query: "{query}"
+        Respond with exactly one word: "FOLLOW-UP" or "NEW".
+        """
+        if self.llm_provider == "gemini":
+            response = self.llm.generate_content(classification_prompt)
+            result = response.text.strip().upper()
+        elif self.llm_provider == "groq":
+            completion = self.llm.chat.completions.create(
+                model=self.llm_model_name,
+                messages=[{"role": "system", "content": classification_prompt}]
+            )
+            result = completion.choices[0].message.content.strip().upper()
+        print("Follow up: ", result)
+        return result == "FOLLOW-UP"
+    def ask(self, query: str, history: list = None):
+        print(f"\n❓ Query: {query}")
+        # Step 1: Classify
+        is_technical = self.is_technical_query(query)
+        is_followup = self.is_followup_query(query, history)
+        # Step 2: Non-technical standalone
+        if not is_technical and not is_followup:
+            print("⚠️ Non-technical standalone query → skipping Qdrant.")
+            system_prompt = "You are a helpful assistant. Answer clearly and concisely."
+            convo = [{"role": "system", "content": system_prompt},
+                     {"role": "user", "content": query}]
+            if self.llm_provider == "gemini":
+                convo_str = "\n".join([f"{m['role'].capitalize()}: {m['content']}" for m in convo])
+                response = self.llm.generate_content(convo_str)
+                return response.text.strip(), []
+            elif self.llm_provider == "groq":
+                completion = self.llm.chat.completions.create(
+                    model=self.llm_model_name,
+                    messages=convo
+                )
+                return completion.choices[0].message.content.strip(), []
+        # Step 3: Technical or follow-up
+        print("is_followup", is_followup)
+        print("last_context", self.last_context)
+        if is_followup and self.last_context:
+            print("🔄 Follow-up query → reusing previous context.")
+            retrieved_context = self.last_context
+        else:
+            print("📥 New technical query → retrieving from Qdrant.")
+            retrieved_context = self.retrieve(query)
+            last_context = []
+            for i, doc in enumerate(retrieved_context):
+                last_context.append(doc['id'])
+                print(f"  - Context {i+1} ({doc['entity_type']}, ID: {doc['id']}, Score: {doc['score']:.2f})")
+            if(len(last_context)>0):
+                self.last_context = last_context  # save for future follow-ups
+        if not retrieved_context:
+            print("💬 No relevant context found.")
+            return "I could not find any relevant information.", []
+        print(f"✅ Using {len(retrieved_context)} documents as context.")
+        answer = self.generate_answer(query, retrieved_context, history, is_followup)
+        last_context = self.last_context
+        print(f"\n🤖 Answer: {answer}")
+        return (answer, last_context)