Spaces:

skkalwar
/

LLM_Model

Sleeping

App Files Files Community

shreekantkalwar commited on Oct 22

Commit

b3e9a96

1 Parent(s): e7d16b7

ensemble

Browse files

Files changed (9) hide show

Dockerfile +1 -1
app.py +79 -15
embedding_model_instance.py +16 -3
mongo_instance.py +10 -2
qdrant_instance.py +11 -4
requirements.txt +0 -0
requirements_backup.txt +78 -0
tempCodeRunnerFile.py +1 -0
util.py +124 -83

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM python:3.13
 WORKDIR /app


1	+ FROM python:3.14
2
3	WORKDIR /app
4

app.py CHANGED Viewed

@@ -2,13 +2,17 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
 from fastapi.middleware.cors import CORSMiddleware
-from bot_instance import gemini_bot, llama_bot  # singleton ErrorBot instance
 from typing import List, Optional,Any
 import os
 from dotenv import load_dotenv
 from util import ErrorBot
 app = FastAPI(title="ErrorBot API")
@@ -53,41 +57,101 @@ def root():
 #     return {"reply": answer}
 load_dotenv()
-GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
-GROQ_API_KEY = os.getenv("GROQ_API_KEY")
-EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5"
 @app.post("/gemini/chat")
 def gemini_chat(request: ChatRequest):
     history_list = [{"role": msg.role, "content": msg.content} for msg in request.history]
     gemini_bot = ErrorBot(
-        embedding_model_name=EMBEDDING_MODEL,
         llm_model_name="gemini-2.5-flash",
-        google_api_key=GOOGLE_API_KEY,
         llm_provider="gemini",
         last_context = request.lastContext
     )
-    print("In App.py")
-    print(request.lastContext)
     answer, last_context = gemini_bot.ask(request.message, history=history_list)
-    print(answer)
-    print(last_context)
     return {"reply": answer, "last_context": last_context}
 @app.post("/llama/chat")
 def llama_chat(request: ChatRequest):
     history_list = [{"role": msg.role, "content": msg.content} for msg in request.history]
     llama_bot = ErrorBot(
-        embedding_model_name=EMBEDDING_MODEL,
         llm_model_name="llama-3.3-70b-versatile",
-        groq_api_key=GROQ_API_KEY,
         llm_provider="groq",
         last_context = request.lastContext
     )
     answer, last_context = llama_bot.ask(request.message, history=history_list)
-    print(answer)
-    print(last_context)
-    return {"reply": answer, "last_context": last_context}

 from fastapi import FastAPI
 from pydantic import BaseModel
 from fastapi.middleware.cors import CORSMiddleware
+#from bot_instance import gemini_bot, llama_bot  # singleton ErrorBot instance
 from typing import List, Optional,Any
 import os
 from dotenv import load_dotenv
 from util import ErrorBot
+# from mongo_to_qdrant_ingestor import MongoToQdrantIngestor
+# from qdrant_instance import qdrant
+# from embedding_model_instance import embedding_model
+# from json_to_qdrant_ingestor import JsonToQdrantIngestor
 app = FastAPI(title="ErrorBot API")
 #     return {"reply": answer}
 load_dotenv()
+#GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
+#GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+#EMBEDDING_MODEL = "BAAI/bge-large-en-v1.5"
+#EMBEDDING_MODEL = "all-MiniLM-L6-v2"
+#EMBEDDING_MODEL = "BAAI/bge-m3"
 @app.post("/gemini/chat")
 def gemini_chat(request: ChatRequest):
     history_list = [{"role": msg.role, "content": msg.content} for msg in request.history]
     gemini_bot = ErrorBot(
         llm_model_name="gemini-2.5-flash",
         llm_provider="gemini",
         last_context = request.lastContext
     )
+    #print("In App.py")
+    #print(request.lastContext)
     answer, last_context = gemini_bot.ask(request.message, history=history_list)
+    #print(answer)
+    #print(last_context)
     return {"reply": answer, "last_context": last_context}
 @app.post("/llama/chat")
 def llama_chat(request: ChatRequest):
     history_list = [{"role": msg.role, "content": msg.content} for msg in request.history]
     llama_bot = ErrorBot(
         llm_model_name="llama-3.3-70b-versatile",
         llm_provider="groq",
         last_context = request.lastContext
     )
     answer, last_context = llama_bot.ask(request.message, history=history_list)
+    #print(answer)
+    #print(last_context)
+    return {"reply": answer, "last_context": last_context}
+# @app.post("/ingest/mongodb")
+# def ingest_mongodb():
+#     """
+#     Ingest documents from MongoDB into the bot's knowledge base.
+#     """
+#     ingestor = MongoToQdrantIngestor(qdrant, embedding_model, collection_name="technical_errors")
+#     # def build_content(doc, entity_type):
+#     #     """Simple example function to build textual content."""
+#     #     return f"{entity_type}: {doc.get('title', '')} {doc.get('description', '')}"
+#     def build_content(doc: dict, entity_type: str) -> str:
+#         """Convert MongoDB document into natural text for embeddings."""
+#         parts = [f"{entity_type} ID: {doc.get('id', str(doc.get('_id', '')))}"]
+#         for k, v in doc.items():
+#             if k in ["_id"]:  # skip ObjectId
+#                 continue
+#             if isinstance(v, list):
+#                 parts.append(f"{k}: {', '.join(map(str, v))}")
+#             elif isinstance(v, dict):
+#                 nested = "; ".join([f"{nk}: {nv}" for nk, nv in v.items() if nv])
+#                 parts.append(f"{k}: {nested}")
+#             else:
+#                 if v:
+#                     parts.append(f"{k}: {v}")
+#         return "\n".join(parts)
+#     ingestor.ingest_from_mongodb(
+#         build_content_fn=build_content,
+#         batch_size=500,
+#     )
+#     return {"status": "ingestion started"}
+# @app.post("/ingest/json")
+# def ingest_json():
+#     """
+#     Ingest documents from JSON into the bot's knowledge base.
+#     """
+#     json_sources = {
+#         "ProblemReport": "./json/problemReports.json",
+#         "Correction": "./json/corrections.json",
+#         "FaultAnalysis": "./json/faultanalysis.json",
+#     }
+#     ingestor = JsonToQdrantIngestor(qdrant, embedding_model, collection_name="technical_errors")
+#     def build_content(doc, entity_type):
+#         """Simple example function to build textual content."""
+#         return f"{entity_type}: {doc.get('title', '')} {doc.get('description', '')}"
+#     ingestor.ingest_from_json(
+#         json_sources,
+#         build_content_fn=build_content,
+#         batch_size=500,
+#     )
+#     return {"status": "ingestion started"}

embedding_model_instance.py CHANGED Viewed

@@ -5,11 +5,24 @@ from sentence_transformers import SentenceTransformer, CrossEncoder
 # --- Embedding model
-EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
-embedding_model = SentenceTransformer(EMBEDDING_MODEL, device=device)
-embedding_dim = embedding_model.get_sentence_embedding_dimension()
 reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

 # --- Embedding model
+EMBEDDING_MODEL_M3 = "BAAI/bge-m3"
+EMBEDDING_MODEL_LARGE = "BAAI/bge-large-en-v1.5"
+#EMBEDDING_MODEL = "all-MiniLM-L6-v2"
+print(torch.cuda.get_device_name(0))
+print("CUDA available:", torch.cuda.is_available())
+print("Current device:", torch.cuda.current_device())
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
+embedding_model_m3 = SentenceTransformer(EMBEDDING_MODEL_M3, device=device)
+embedding_model_large = SentenceTransformer(EMBEDDING_MODEL_LARGE, device=device)
+embedding_dim_m3 = embedding_model_m3.get_sentence_embedding_dimension()
+embedding_dim_large = embedding_model_large.get_sentence_embedding_dimension()
 reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

mongo_instance.py CHANGED Viewed

@@ -1,6 +1,14 @@
 from pymongo import MongoClient
 # Connect to MongoDB
-client = MongoClient("mongodb+srv://dhaval:Dhaval15@cluster0.rwu1ze6.mongodb.net/prontoDB?retryWrites=true&w=majority&appName=Cluster0")  # replace with your URI
-db = client["prontoDB"]

 from pymongo import MongoClient
+import os
+from dotenv import load_dotenv
+load_dotenv()
 # Connect to MongoDB
+mongo_uri = os.getenv("MONGO_URI")
+client = MongoClient(mongo_uri)  # replace with your URI
+db = client[os.getenv("MONGO_DB_NAME")]  # replace with your database name
+print("MongoDB Connected")

qdrant_instance.py CHANGED Viewed

@@ -7,7 +7,14 @@ load_dotenv()
 print("Connecting to Qdrant...")
-qdrant = QdrantClient(
-            url=os.getenv("QDRANT_URL"),
-            api_key=os.getenv("QDRANT_API_KEY"),
-        )

 print("Connecting to Qdrant...")
+qdrant_m3 = QdrantClient(
+            url=os.getenv("QDRANT_URL_M3"),
+            api_key=os.getenv("QDRANT_API_KEY_M3"),
+        )
+qdrant_large = QdrantClient(
+            url=os.getenv("QDRANT_URL_LARGE"),
+            api_key=os.getenv("QDRANT_API_KEY_LARGE"),
+        )
+# qdrant = QdrantClient(host="localhost", port=6333)

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

requirements_backup.txt ADDED Viewed

	@@ -0,0 +1,78 @@

+accelerate==1.10.1
+annotated-types==0.7.0
+anyio==4.10.0
+bitsandbytes==0.42.0
+bitsandbytes-windows==0.37.5
+cachetools==5.5.2
+certifi==2025.8.3
+charset-normalizer==3.4.3
+click==8.2.1
+colorama==0.4.6
+distro==1.9.0
+dnspython==2.8.0
+fastapi==0.116.1
+filelock==3.19.1
+fsspec==2025.7.0
+google-ai-generativelanguage==0.6.15
+google-api-core==2.25.1
+google-api-python-client==2.181.0
+google-auth==2.40.3
+google-auth-httplib2==0.2.0
+google-generativeai==0.8.5
+googleapis-common-protos==1.70.0
+groq==0.31.1
+grpcio==1.74.0
+grpcio-status==1.71.2
+h11==0.16.0
+h2==4.3.0
+hpack==4.1.0
+httpcore==1.0.9
+httplib2==0.30.0
+httpx==0.28.1
+huggingface-hub==0.34.4
+hyperframe==6.1.0
+idna==3.10
+Jinja2==3.1.6
+joblib==1.5.2
+MarkupSafe==3.0.2
+mpmath==1.3.0
+networkx==3.4.2
+numpy==2.2.6
+packaging==25.0
+pillow==11.3.0
+portalocker==3.2.0
+proto-plus==1.26.1
+protobuf==5.29.5
+psutil==7.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pydantic==2.11.7
+pydantic_core==2.33.2
+pymongo==4.15.1
+pyparsing==3.2.3
+python-dotenv==1.1.1
+PyYAML==6.0.2
+qdrant-client==1.15.1
+regex==2025.7.34
+requests==2.32.5
+rsa==4.9.1
+safetensors==0.6.2
+scikit-learn==1.7.2
+scipy==1.16.2
+sentence-transformers==5.1.0
+setuptools==80.9.0
+sniffio==1.3.1
+starlette==0.47.3
+sympy==1.14.0
+threadpoolctl==3.6.0
+tokenizers==0.21.4
+torch==2.8.0
+tqdm==4.67.1
+transformers==4.55.4
+typing-inspection==0.4.1
+typing_extensions==4.15.0
+uritemplate==4.2.0
+urllib3==2.5.0
+uvicorn==0.35.0
+wheel==0.45.1

tempCodeRunnerFile.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ MiniLM

util.py CHANGED Viewed

@@ -3,18 +3,19 @@ import torch
 from qdrant_client import QdrantClient, models
 from sentence_transformers import SentenceTransformer, CrossEncoder
 from pymongo import MongoClient
-from bson import ObjectId
 from typing import List, Dict
 import google.generativeai as genai
 from groq import Groq
-from embedding_model_instance import embedding_model, embedding_dim, reranker
-from qdrant_instance import qdrant
 from llm import gemini, groq
 from mongo_instance import db
 import json
 from bson import ObjectId
 def build_content(doc: dict, entity_type: str) -> str:
     """Convert MongoDB document into natural text for embeddings."""
     parts = [f"{entity_type} ID: {doc.get('id', str(doc.get('_id', '')))}"]
@@ -35,7 +36,7 @@ def build_content(doc: dict, entity_type: str) -> str:
 class ErrorBot:
     """Chatbot using RAG (Qdrant + Gemini API)."""
-    def __init__(self, embedding_model_name: str, llm_model_name: str, google_api_key: str = None, groq_api_key: str = None, llm_provider: str = "gemini", last_context: list = None):
         print("🚀 Initializing ErrorBot...")
         self.last_context = last_context
@@ -43,14 +44,22 @@ class ErrorBot:
         # --- Embedding model
         # self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.embedding_model = embedding_model
-        self.embedding_dim = embedding_dim
         self.db = db
         # --- Qdrant client
-        self.qdrant = qdrant
         self.collection_name = "technical_errors"
         #self._setup_collection()
         # --- LLM setup
@@ -73,88 +82,115 @@ class ErrorBot:
         self.reranker = reranker
         print(f"✅ ErrorBot ready with {self.llm_provider.upper()}")
-    def _setup_collection(self):
-        if not self.qdrant.collection_exists(self.collection_name):
-            self.qdrant.create_collection(
-                collection_name=self.collection_name,
-                vectors_config=models.VectorParams(
-                    size=self.embedding_dim,
-                    distance=models.Distance.COSINE,
-                ),
-            )
-    def ingest_from_mongodb(self, mongo_uri: str, db_name: str, batch_size: int = 32):
-        client = MongoClient(mongo_uri)
-        db = client[db_name]
-        collections = {
-            "ProblemReport": db["problemReports"],
-            "FaultAnalysis": db["faultanalysis"],
-            "Correction": db["corrections"],
-        }
-        docs = []
-        for entity_type, coll in collections.items():
-            for doc in coll.find():
-                if "_id" in doc and isinstance(doc["_id"], ObjectId):
-                    doc["_id"] = str(doc["_id"])
-                docs.append({"entity_type": entity_type, "data": doc})
-        contents = [build_content(d["data"], d["entity_type"]) for d in docs]
-        all_embeddings = []
-        for i in range(0, len(contents), batch_size):
-            batch_contents = contents[i:i + batch_size]
-            embeddings = self.embedding_model.encode(batch_contents, show_progress_bar=True).tolist()
-            all_embeddings.extend(embeddings)
-        self.qdrant.upsert(
             collection_name=self.collection_name,
-            points=[
-                models.PointStruct(
-                    id=i,
-                    vector=emb,
-                    payload={
-                        "id": d["data"].get("id", str(d["data"].get("_id", i))),
-                        "entity_type": d["entity_type"],
-                        "raw": d["data"],
-                        "content": c,
-                    },
-                )
-                for i, (d, emb, c) in enumerate(zip(docs, all_embeddings, contents))
-            ],
-            wait=True,
-        )
-        print(f"✅ Ingested {len(docs)} documents into '{self.collection_name}'")
-    def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.3, rerank: bool = True):
-        query_embedding = self.embedding_model.encode(query).tolist()
-        hits = self.qdrant.query_points(
             collection_name=self.collection_name,
-            query=query_embedding,
-            limit=top_k * 3 if rerank else top_k,
             with_payload=True,
             score_threshold=score_threshold,
         ).points
-        candidates = [
-            {
-                "id": hit.payload.get("id"),
-                "entity_type": hit.payload.get("entity_type", ""),
-                "content": hit.payload.get("content", ""),
-                "score": hit.score,
-            }
-            for hit in hits
-        ]
-        if rerank and candidates:
-            pairs = [(query, c["content"]) for c in candidates]
             scores = self.reranker.predict(pairs)
-            for i, score in enumerate(scores):
-                candidates[i]["rerank_score"] = float(score)
-            candidates = sorted(candidates, key=lambda x: x["rerank_score"], reverse=True)
-        return candidates[:top_k]
     def generate_answer(self, query: str, context: List[Dict], history: list = None, is_followup: bool = False ):
         """
@@ -251,9 +287,9 @@ class ErrorBot:
         Your tasks are:
         1. If the question is about PR, FA, or CR → Identify which information is relevant and explain clearly in simple, actionable language (summarize, don’t just repeat).
-        2. If the question is about programming or algorithms → Provide a correct, clear, and well-structured code example in the requested language, with a short explanation.
         3. If the question is non-technical/general → Respond politely, clearly, and helpfully in a conversational style.
-        4. Always keep answers concise and easy to understand.
         ### User Question:
@@ -290,10 +326,13 @@ class ErrorBot:
             )
             return completion.choices[0].message.content.strip()
     def fetch_problem_report_with_links(self, pr_id: str):
         # --- Fetch Problem Report
         pr_doc = db["problemReports"].find_one({"id": pr_id})
         if not pr_doc:
             return None, [], [], [], []
@@ -326,6 +365,8 @@ class ErrorBot:
         for doc in fa_docs:
             if "_id" in doc and isinstance(doc["_id"], ObjectId):
                 doc["_id"] = str(doc["_id"])
         return pr_doc, cr_ids, fa_ids, cr_docs, fa_docs
@@ -423,7 +464,7 @@ class ErrorBot:
                 )
                 return completion.choices[0].message.content.strip(), []
-        elif is_followup and self.last_context:
             if not is_technical:
                 print("⚠️ Non-technical followup → skipping Qdrant.")
                 system_prompt = "You are a helpful assistant. Answer clearly and concisely."
@@ -546,7 +587,7 @@ class ErrorBot:
             print("💬 No relevant context found.")
             return "I could not find any relevant information.", []
-        print(f"✅ Using {len(retrieved_context)} documents as context.")
         #answer = self.generate_answer(query, retrieved_context, history, is_followup)
         answer = self.generate_answer(query, context_docs, history, is_followup)

 from qdrant_client import QdrantClient, models
 from sentence_transformers import SentenceTransformer, CrossEncoder
 from pymongo import MongoClient
 from typing import List, Dict
 import google.generativeai as genai
 from groq import Groq
+from embedding_model_instance import embedding_model_m3, embedding_dim_m3, embedding_model_large, embedding_dim_large, reranker
+from qdrant_instance import qdrant_m3, qdrant_large
 from llm import gemini, groq
 from mongo_instance import db
 import json
 from bson import ObjectId
 def build_content(doc: dict, entity_type: str) -> str:
     """Convert MongoDB document into natural text for embeddings."""
     parts = [f"{entity_type} ID: {doc.get('id', str(doc.get('_id', '')))}"]
 class ErrorBot:
     """Chatbot using RAG (Qdrant + Gemini API)."""
+    def __init__(self, llm_model_name: str,  llm_provider: str = "gemini", last_context: list = None):
         print("🚀 Initializing ErrorBot...")
         self.last_context = last_context
         # --- Embedding model
         # self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.embedding_model_m3 = embedding_model_m3
+        self.embedding_dim_m3 = embedding_dim_m3
+        self.embedding_model_large = embedding_model_large
+        self.embedding_dim_large = embedding_dim_large
         self.db = db
         # --- Qdrant client
+        self.qdrant_m3 = qdrant_m3
+        self.qdrant_large = qdrant_large
         self.collection_name = "technical_errors"
+        #self.collection_name = "json_ingestion"
         #self._setup_collection()
         # --- LLM setup
         self.reranker = reranker
         print(f"✅ ErrorBot ready with {self.llm_provider.upper()}")
+    # def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.5, rerank: bool = True):
+    #     query_embedding = self.embedding_model.encode(query).tolist()
+    #     hits = self.qdrant.query_points(
+    #         collection_name=self.collection_name,
+    #         query=query_embedding,
+    #         #limit=top_k * 3 if rerank else top_k,
+    #         limit = 100,
+    #         with_payload=True,
+    #         score_threshold=score_threshold,
+    #         search_params=models.SearchParams(hnsw_ef=256),
+    #     ).points
+    #     candidates = [
+    #         {
+    #              "id": hit.payload.get("id"),
+    #             # "id": hit.payload.get("raw", {}).get("id"),
+    #             "entity_type": hit.payload.get("entity_type", ""),
+    #             "content": hit.payload.get("content", ""),
+    #             "score": hit.score,
+    #         }
+    #         for hit in hits
+    #     ]
+    #     if rerank and candidates:
+    #         pairs = [(query, c["content"]) for c in candidates]
+    #         scores = self.reranker.predict(pairs)
+    #         for i, score in enumerate(scores):
+    #             candidates[i]["rerank_score"] = float(score)
+    #         candidates = sorted(candidates, key=lambda x: x["rerank_score"], reverse=True)
+    #     return candidates[:5]
+    # ==================================================
+    # 🧮 Dual Qdrant Ensemble Retrieval
+    # ==================================================
+    def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.5, rerank: bool = True):
+        """Retrieve documents using ensemble of BGE-M3 and BGE-Large models."""
+        print(f"\n🔍 Retrieving context using ensemble (M3 + BGE-Large) for query: {query}")
+        # 1️⃣ Encode using both models
+        emb_m3 = self.embedding_model_m3.encode(query).tolist()
+        emb_large = self.embedding_model_large.encode(query).tolist()
+        # 2️⃣ Query both Qdrant clusters
+        hits_m3 = self.qdrant_m3.query_points(
             collection_name=self.collection_name,
+            query=emb_m3,
+            limit=top_k * 3,
+            with_payload=True,
+            score_threshold=score_threshold,
+        ).points
+        hits_large = self.qdrant_large.query_points(
             collection_name=self.collection_name,
+            query=emb_large,
+            limit=top_k * 3,
             with_payload=True,
             score_threshold=score_threshold,
         ).points
+        # 3️⃣ Combine results — average normalized scores
+        all_hits = []
+        for hit in hits_m3 + hits_large:
+            payload = hit.payload
+            score = hit.score
+            all_hits.append({
+                "id": payload.get("id"),
+                "entity_type": payload.get("entity_type", ""),
+                "content": payload.get("content", ""),
+                "score": score,
+                "source": "M3" if hit in hits_m3 else "LARGE"
+            })
+        if not all_hits:
+            print("⚠️ No hits from either model.")
+            return []
+        # Normalize scores between 0-1 (optional)
+        scores = [h["score"] for h in all_hits]
+        min_s, max_s = min(scores), max(scores)
+        for h in all_hits:
+            h["score_norm"] = (h["score"] - min_s) / (max_s - min_s + 1e-6)
+        # Group by ID and average scores if duplicates exist
+        merged = {}
+        for h in all_hits:
+            _id = h["id"]
+            if _id not in merged:
+                merged[_id] = h
+            else:
+                merged[_id]["score_norm"] = (merged[_id]["score_norm"] + h["score_norm"]) / 2
+        combined_hits = list(merged.values())
+        combined_hits = sorted(combined_hits, key=lambda x: x["score_norm"], reverse=True)[:top_k * 2]
+        # 4️⃣ (Optional) Rerank using cross encoder
+        if rerank and combined_hits:
+            pairs = [(query, h["content"]) for h in combined_hits]
             scores = self.reranker.predict(pairs)
+            for i, s in enumerate(scores):
+                combined_hits[i]["rerank_score"] = float(s)
+            combined_hits = sorted(combined_hits, key=lambda x: x["rerank_score"], reverse=True)
+        print(f"✅ Ensemble retrieved {len(combined_hits)} candidates.")
+        return combined_hits[:top_k]
     def generate_answer(self, query: str, context: List[Dict], history: list = None, is_followup: bool = False ):
         """
         Your tasks are:
         1. If the question is about PR, FA, or CR → Identify which information is relevant and explain clearly in simple, actionable language (summarize, don’t just repeat).
+        2. If the question is about programming or algorithms → Provide a correct, clear, and well-structured code example in the requested language, with explanation.
         3. If the question is non-technical/general → Respond politely, clearly, and helpfully in a conversational style.
+        4. Always keep answers and easy to understand and detailed.
         ### User Question:
             )
             return completion.choices[0].message.content.strip()
     def fetch_problem_report_with_links(self, pr_id: str):
         # --- Fetch Problem Report
         pr_doc = db["problemReports"].find_one({"id": pr_id})
+        #print("pr_id:", pr_id)
+        #print("pr_doc:", pr_doc)
         if not pr_doc:
             return None, [], [], [], []
         for doc in fa_docs:
             if "_id" in doc and isinstance(doc["_id"], ObjectId):
                 doc["_id"] = str(doc["_id"])
+        print(pr_doc)
         return pr_doc, cr_ids, fa_ids, cr_docs, fa_docs
                 )
                 return completion.choices[0].message.content.strip(), []
+        elif is_followup and self.last_context:
             if not is_technical:
                 print("⚠️ Non-technical followup → skipping Qdrant.")
                 system_prompt = "You are a helpful assistant. Answer clearly and concisely."
             print("💬 No relevant context found.")
             return "I could not find any relevant information.", []
+        #print(f"✅ Using {len(retrieved_context)} documents as context.")
         #answer = self.generate_answer(query, retrieved_context, history, is_followup)
         answer = self.generate_answer(query, context_docs, history, is_followup)