shreekantkalwar commited on
Commit
b3e9a96
·
1 Parent(s): e7d16b7
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- FROM python:3.13
2
 
3
  WORKDIR /app
4
 
 
1
+ FROM python:3.14
2
 
3
  WORKDIR /app
4
 
app.py CHANGED
@@ -2,13 +2,17 @@
2
  from fastapi import FastAPI
3
  from pydantic import BaseModel
4
  from fastapi.middleware.cors import CORSMiddleware
5
- from bot_instance import gemini_bot, llama_bot # singleton ErrorBot instance
6
  from typing import List, Optional,Any
7
 
8
  import os
9
  from dotenv import load_dotenv
10
  from util import ErrorBot
11
 
 
 
 
 
12
 
13
  app = FastAPI(title="ErrorBot API")
14
 
@@ -53,41 +57,101 @@ def root():
53
  # return {"reply": answer}
54
 
55
  load_dotenv()
56
- GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
57
 
58
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
59
 
60
- EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5"
 
 
61
 
62
  @app.post("/gemini/chat")
63
  def gemini_chat(request: ChatRequest):
64
  history_list = [{"role": msg.role, "content": msg.content} for msg in request.history]
65
  gemini_bot = ErrorBot(
66
- embedding_model_name=EMBEDDING_MODEL,
67
  llm_model_name="gemini-2.5-flash",
68
- google_api_key=GOOGLE_API_KEY,
69
  llm_provider="gemini",
70
  last_context = request.lastContext
71
  )
72
- print("In App.py")
73
- print(request.lastContext)
74
  answer, last_context = gemini_bot.ask(request.message, history=history_list)
75
- print(answer)
76
- print(last_context)
77
  return {"reply": answer, "last_context": last_context}
78
 
79
  @app.post("/llama/chat")
80
  def llama_chat(request: ChatRequest):
81
  history_list = [{"role": msg.role, "content": msg.content} for msg in request.history]
82
  llama_bot = ErrorBot(
83
- embedding_model_name=EMBEDDING_MODEL,
84
  llm_model_name="llama-3.3-70b-versatile",
85
- groq_api_key=GROQ_API_KEY,
86
  llm_provider="groq",
87
  last_context = request.lastContext
88
 
89
  )
90
  answer, last_context = llama_bot.ask(request.message, history=history_list)
91
- print(answer)
92
- print(last_context)
93
- return {"reply": answer, "last_context": last_context}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from fastapi import FastAPI
3
  from pydantic import BaseModel
4
  from fastapi.middleware.cors import CORSMiddleware
5
+ #from bot_instance import gemini_bot, llama_bot # singleton ErrorBot instance
6
  from typing import List, Optional,Any
7
 
8
  import os
9
  from dotenv import load_dotenv
10
  from util import ErrorBot
11
 
12
+ # from mongo_to_qdrant_ingestor import MongoToQdrantIngestor
13
+ # from qdrant_instance import qdrant
14
+ # from embedding_model_instance import embedding_model
15
+ # from json_to_qdrant_ingestor import JsonToQdrantIngestor
16
 
17
  app = FastAPI(title="ErrorBot API")
18
 
 
57
  # return {"reply": answer}
58
 
59
  load_dotenv()
60
+ #GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
61
 
62
+ #GROQ_API_KEY = os.getenv("GROQ_API_KEY")
63
 
64
+ #EMBEDDING_MODEL = "BAAI/bge-large-en-v1.5"
65
+ #EMBEDDING_MODEL = "all-MiniLM-L6-v2"
66
+ #EMBEDDING_MODEL = "BAAI/bge-m3"
67
 
68
  @app.post("/gemini/chat")
69
  def gemini_chat(request: ChatRequest):
70
  history_list = [{"role": msg.role, "content": msg.content} for msg in request.history]
71
  gemini_bot = ErrorBot(
 
72
  llm_model_name="gemini-2.5-flash",
 
73
  llm_provider="gemini",
74
  last_context = request.lastContext
75
  )
76
+ #print("In App.py")
77
+ #print(request.lastContext)
78
  answer, last_context = gemini_bot.ask(request.message, history=history_list)
79
+ #print(answer)
80
+ #print(last_context)
81
  return {"reply": answer, "last_context": last_context}
82
 
83
  @app.post("/llama/chat")
84
  def llama_chat(request: ChatRequest):
85
  history_list = [{"role": msg.role, "content": msg.content} for msg in request.history]
86
  llama_bot = ErrorBot(
 
87
  llm_model_name="llama-3.3-70b-versatile",
 
88
  llm_provider="groq",
89
  last_context = request.lastContext
90
 
91
  )
92
  answer, last_context = llama_bot.ask(request.message, history=history_list)
93
+ #print(answer)
94
+ #print(last_context)
95
+ return {"reply": answer, "last_context": last_context}
96
+
97
+ # @app.post("/ingest/mongodb")
98
+ # def ingest_mongodb():
99
+ # """
100
+ # Ingest documents from MongoDB into the bot's knowledge base.
101
+ # """
102
+
103
+ # ingestor = MongoToQdrantIngestor(qdrant, embedding_model, collection_name="technical_errors")
104
+
105
+
106
+ # # def build_content(doc, entity_type):
107
+ # # """Simple example function to build textual content."""
108
+ # # return f"{entity_type}: {doc.get('title', '')} {doc.get('description', '')}"
109
+
110
+ # def build_content(doc: dict, entity_type: str) -> str:
111
+ # """Convert MongoDB document into natural text for embeddings."""
112
+ # parts = [f"{entity_type} ID: {doc.get('id', str(doc.get('_id', '')))}"]
113
+ # for k, v in doc.items():
114
+ # if k in ["_id"]: # skip ObjectId
115
+ # continue
116
+ # if isinstance(v, list):
117
+ # parts.append(f"{k}: {', '.join(map(str, v))}")
118
+ # elif isinstance(v, dict):
119
+ # nested = "; ".join([f"{nk}: {nv}" for nk, nv in v.items() if nv])
120
+ # parts.append(f"{k}: {nested}")
121
+ # else:
122
+ # if v:
123
+ # parts.append(f"{k}: {v}")
124
+ # return "\n".join(parts)
125
+
126
+ # ingestor.ingest_from_mongodb(
127
+
128
+ # build_content_fn=build_content,
129
+ # batch_size=500,
130
+ # )
131
+ # return {"status": "ingestion started"}
132
+
133
+
134
+ # @app.post("/ingest/json")
135
+ # def ingest_json():
136
+ # """
137
+ # Ingest documents from JSON into the bot's knowledge base.
138
+ # """
139
+
140
+ # json_sources = {
141
+ # "ProblemReport": "./json/problemReports.json",
142
+ # "Correction": "./json/corrections.json",
143
+ # "FaultAnalysis": "./json/faultanalysis.json",
144
+ # }
145
+ # ingestor = JsonToQdrantIngestor(qdrant, embedding_model, collection_name="technical_errors")
146
+
147
+
148
+ # def build_content(doc, entity_type):
149
+ # """Simple example function to build textual content."""
150
+ # return f"{entity_type}: {doc.get('title', '')} {doc.get('description', '')}"
151
+
152
+ # ingestor.ingest_from_json(
153
+ # json_sources,
154
+ # build_content_fn=build_content,
155
+ # batch_size=500,
156
+ # )
157
+ # return {"status": "ingestion started"}
embedding_model_instance.py CHANGED
@@ -5,11 +5,24 @@ from sentence_transformers import SentenceTransformer, CrossEncoder
5
  # --- Embedding model
6
 
7
 
8
- EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5"
 
 
 
 
 
 
 
 
9
 
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  print(f"Using device: {device}")
12
- embedding_model = SentenceTransformer(EMBEDDING_MODEL, device=device)
13
- embedding_dim = embedding_model.get_sentence_embedding_dimension()
 
 
 
 
 
14
 
15
  reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
 
5
  # --- Embedding model
6
 
7
 
8
+
9
+ EMBEDDING_MODEL_M3 = "BAAI/bge-m3"
10
+ EMBEDDING_MODEL_LARGE = "BAAI/bge-large-en-v1.5"
11
+
12
+ #EMBEDDING_MODEL = "all-MiniLM-L6-v2"
13
+
14
+ print(torch.cuda.get_device_name(0))
15
+ print("CUDA available:", torch.cuda.is_available())
16
+ print("Current device:", torch.cuda.current_device())
17
 
18
  device = "cuda" if torch.cuda.is_available() else "cpu"
19
  print(f"Using device: {device}")
20
+
21
+
22
+ embedding_model_m3 = SentenceTransformer(EMBEDDING_MODEL_M3, device=device)
23
+ embedding_model_large = SentenceTransformer(EMBEDDING_MODEL_LARGE, device=device)
24
+
25
+ embedding_dim_m3 = embedding_model_m3.get_sentence_embedding_dimension()
26
+ embedding_dim_large = embedding_model_large.get_sentence_embedding_dimension()
27
 
28
  reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
mongo_instance.py CHANGED
@@ -1,6 +1,14 @@
1
  from pymongo import MongoClient
 
 
 
 
2
 
3
  # Connect to MongoDB
4
- client = MongoClient("mongodb+srv://dhaval:Dhaval15@cluster0.rwu1ze6.mongodb.net/prontoDB?retryWrites=true&w=majority&appName=Cluster0") # replace with your URI
5
- db = client["prontoDB"]
 
 
 
 
6
 
 
1
  from pymongo import MongoClient
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
 
7
  # Connect to MongoDB
8
+
9
+ mongo_uri = os.getenv("MONGO_URI")
10
+ client = MongoClient(mongo_uri) # replace with your URI
11
+ db = client[os.getenv("MONGO_DB_NAME")] # replace with your database name
12
+
13
+ print("MongoDB Connected")
14
 
qdrant_instance.py CHANGED
@@ -7,7 +7,14 @@ load_dotenv()
7
 
8
 
9
  print("Connecting to Qdrant...")
10
- qdrant = QdrantClient(
11
- url=os.getenv("QDRANT_URL"),
12
- api_key=os.getenv("QDRANT_API_KEY"),
13
- )
 
 
 
 
 
 
 
 
7
 
8
 
9
  print("Connecting to Qdrant...")
10
+ qdrant_m3 = QdrantClient(
11
+ url=os.getenv("QDRANT_URL_M3"),
12
+ api_key=os.getenv("QDRANT_API_KEY_M3"),
13
+ )
14
+
15
+ qdrant_large = QdrantClient(
16
+ url=os.getenv("QDRANT_URL_LARGE"),
17
+ api_key=os.getenv("QDRANT_API_KEY_LARGE"),
18
+ )
19
+
20
+ # qdrant = QdrantClient(host="localhost", port=6333)
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
requirements_backup.txt ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.10.1
2
+ annotated-types==0.7.0
3
+ anyio==4.10.0
4
+ bitsandbytes==0.42.0
5
+ bitsandbytes-windows==0.37.5
6
+ cachetools==5.5.2
7
+ certifi==2025.8.3
8
+ charset-normalizer==3.4.3
9
+ click==8.2.1
10
+ colorama==0.4.6
11
+ distro==1.9.0
12
+ dnspython==2.8.0
13
+ fastapi==0.116.1
14
+ filelock==3.19.1
15
+ fsspec==2025.7.0
16
+ google-ai-generativelanguage==0.6.15
17
+ google-api-core==2.25.1
18
+ google-api-python-client==2.181.0
19
+ google-auth==2.40.3
20
+ google-auth-httplib2==0.2.0
21
+ google-generativeai==0.8.5
22
+ googleapis-common-protos==1.70.0
23
+ groq==0.31.1
24
+ grpcio==1.74.0
25
+ grpcio-status==1.71.2
26
+ h11==0.16.0
27
+ h2==4.3.0
28
+ hpack==4.1.0
29
+ httpcore==1.0.9
30
+ httplib2==0.30.0
31
+ httpx==0.28.1
32
+ huggingface-hub==0.34.4
33
+ hyperframe==6.1.0
34
+ idna==3.10
35
+ Jinja2==3.1.6
36
+ joblib==1.5.2
37
+ MarkupSafe==3.0.2
38
+ mpmath==1.3.0
39
+ networkx==3.4.2
40
+ numpy==2.2.6
41
+ packaging==25.0
42
+ pillow==11.3.0
43
+ portalocker==3.2.0
44
+ proto-plus==1.26.1
45
+ protobuf==5.29.5
46
+ psutil==7.0.0
47
+ pyasn1==0.6.1
48
+ pyasn1_modules==0.4.2
49
+ pydantic==2.11.7
50
+ pydantic_core==2.33.2
51
+ pymongo==4.15.1
52
+ pyparsing==3.2.3
53
+ python-dotenv==1.1.1
54
+
55
+ PyYAML==6.0.2
56
+ qdrant-client==1.15.1
57
+ regex==2025.7.34
58
+ requests==2.32.5
59
+ rsa==4.9.1
60
+ safetensors==0.6.2
61
+ scikit-learn==1.7.2
62
+ scipy==1.16.2
63
+ sentence-transformers==5.1.0
64
+ setuptools==80.9.0
65
+ sniffio==1.3.1
66
+ starlette==0.47.3
67
+ sympy==1.14.0
68
+ threadpoolctl==3.6.0
69
+ tokenizers==0.21.4
70
+ torch==2.8.0
71
+ tqdm==4.67.1
72
+ transformers==4.55.4
73
+ typing-inspection==0.4.1
74
+ typing_extensions==4.15.0
75
+ uritemplate==4.2.0
76
+ urllib3==2.5.0
77
+ uvicorn==0.35.0
78
+ wheel==0.45.1
tempCodeRunnerFile.py ADDED
@@ -0,0 +1 @@
 
 
1
+ MiniLM
util.py CHANGED
@@ -3,18 +3,19 @@ import torch
3
  from qdrant_client import QdrantClient, models
4
  from sentence_transformers import SentenceTransformer, CrossEncoder
5
  from pymongo import MongoClient
6
- from bson import ObjectId
7
  from typing import List, Dict
8
  import google.generativeai as genai
9
  from groq import Groq
10
 
11
- from embedding_model_instance import embedding_model, embedding_dim, reranker
12
- from qdrant_instance import qdrant
13
  from llm import gemini, groq
14
  from mongo_instance import db
15
  import json
16
  from bson import ObjectId
17
 
 
18
  def build_content(doc: dict, entity_type: str) -> str:
19
  """Convert MongoDB document into natural text for embeddings."""
20
  parts = [f"{entity_type} ID: {doc.get('id', str(doc.get('_id', '')))}"]
@@ -35,7 +36,7 @@ def build_content(doc: dict, entity_type: str) -> str:
35
  class ErrorBot:
36
  """Chatbot using RAG (Qdrant + Gemini API)."""
37
 
38
- def __init__(self, embedding_model_name: str, llm_model_name: str, google_api_key: str = None, groq_api_key: str = None, llm_provider: str = "gemini", last_context: list = None):
39
  print("🚀 Initializing ErrorBot...")
40
  self.last_context = last_context
41
 
@@ -43,14 +44,22 @@ class ErrorBot:
43
  # --- Embedding model
44
  # self.device = "cuda" if torch.cuda.is_available() else "cpu"
45
 
46
- self.embedding_model = embedding_model
47
- self.embedding_dim = embedding_dim
 
 
 
 
 
48
 
49
  self.db = db
50
  # --- Qdrant client
51
 
52
- self.qdrant = qdrant
 
53
  self.collection_name = "technical_errors"
 
 
54
  #self._setup_collection()
55
 
56
  # --- LLM setup
@@ -73,88 +82,115 @@ class ErrorBot:
73
  self.reranker = reranker
74
  print(f"✅ ErrorBot ready with {self.llm_provider.upper()}")
75
 
76
- def _setup_collection(self):
77
- if not self.qdrant.collection_exists(self.collection_name):
78
- self.qdrant.create_collection(
79
- collection_name=self.collection_name,
80
- vectors_config=models.VectorParams(
81
- size=self.embedding_dim,
82
- distance=models.Distance.COSINE,
83
- ),
84
- )
85
-
86
- def ingest_from_mongodb(self, mongo_uri: str, db_name: str, batch_size: int = 32):
87
- client = MongoClient(mongo_uri)
88
- db = client[db_name]
89
-
90
- collections = {
91
- "ProblemReport": db["problemReports"],
92
- "FaultAnalysis": db["faultanalysis"],
93
- "Correction": db["corrections"],
94
- }
95
-
96
- docs = []
97
- for entity_type, coll in collections.items():
98
- for doc in coll.find():
99
- if "_id" in doc and isinstance(doc["_id"], ObjectId):
100
- doc["_id"] = str(doc["_id"])
101
- docs.append({"entity_type": entity_type, "data": doc})
102
-
103
- contents = [build_content(d["data"], d["entity_type"]) for d in docs]
104
 
105
- all_embeddings = []
106
- for i in range(0, len(contents), batch_size):
107
- batch_contents = contents[i:i + batch_size]
108
- embeddings = self.embedding_model.encode(batch_contents, show_progress_bar=True).tolist()
109
- all_embeddings.extend(embeddings)
110
 
111
- self.qdrant.upsert(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  collection_name=self.collection_name,
113
- points=[
114
- models.PointStruct(
115
- id=i,
116
- vector=emb,
117
- payload={
118
- "id": d["data"].get("id", str(d["data"].get("_id", i))),
119
- "entity_type": d["entity_type"],
120
- "raw": d["data"],
121
- "content": c,
122
- },
123
- )
124
- for i, (d, emb, c) in enumerate(zip(docs, all_embeddings, contents))
125
- ],
126
- wait=True,
127
- )
128
- print(f"✅ Ingested {len(docs)} documents into '{self.collection_name}'")
129
-
130
- def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.3, rerank: bool = True):
131
- query_embedding = self.embedding_model.encode(query).tolist()
132
- hits = self.qdrant.query_points(
133
  collection_name=self.collection_name,
134
- query=query_embedding,
135
- limit=top_k * 3 if rerank else top_k,
136
  with_payload=True,
137
  score_threshold=score_threshold,
138
  ).points
139
 
140
- candidates = [
141
- {
142
- "id": hit.payload.get("id"),
143
- "entity_type": hit.payload.get("entity_type", ""),
144
- "content": hit.payload.get("content", ""),
145
- "score": hit.score,
146
- }
147
- for hit in hits
148
- ]
149
-
150
- if rerank and candidates:
151
- pairs = [(query, c["content"]) for c in candidates]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  scores = self.reranker.predict(pairs)
153
- for i, score in enumerate(scores):
154
- candidates[i]["rerank_score"] = float(score)
155
- candidates = sorted(candidates, key=lambda x: x["rerank_score"], reverse=True)
156
 
157
- return candidates[:top_k]
 
158
 
159
  def generate_answer(self, query: str, context: List[Dict], history: list = None, is_followup: bool = False ):
160
  """
@@ -251,9 +287,9 @@ class ErrorBot:
251
 
252
  Your tasks are:
253
  1. If the question is about PR, FA, or CR → Identify which information is relevant and explain clearly in simple, actionable language (summarize, don’t just repeat).
254
- 2. If the question is about programming or algorithms → Provide a correct, clear, and well-structured code example in the requested language, with a short explanation.
255
  3. If the question is non-technical/general → Respond politely, clearly, and helpfully in a conversational style.
256
- 4. Always keep answers concise and easy to understand.
257
 
258
  ### User Question:
259
 
@@ -290,10 +326,13 @@ class ErrorBot:
290
  )
291
  return completion.choices[0].message.content.strip()
292
 
 
293
  def fetch_problem_report_with_links(self, pr_id: str):
294
 
295
  # --- Fetch Problem Report
296
  pr_doc = db["problemReports"].find_one({"id": pr_id})
 
 
297
  if not pr_doc:
298
  return None, [], [], [], []
299
 
@@ -326,6 +365,8 @@ class ErrorBot:
326
  for doc in fa_docs:
327
  if "_id" in doc and isinstance(doc["_id"], ObjectId):
328
  doc["_id"] = str(doc["_id"])
 
 
329
 
330
  return pr_doc, cr_ids, fa_ids, cr_docs, fa_docs
331
 
@@ -423,7 +464,7 @@ class ErrorBot:
423
  )
424
  return completion.choices[0].message.content.strip(), []
425
 
426
- elif is_followup and self.last_context:
427
  if not is_technical:
428
  print("⚠️ Non-technical followup → skipping Qdrant.")
429
  system_prompt = "You are a helpful assistant. Answer clearly and concisely."
@@ -546,7 +587,7 @@ class ErrorBot:
546
  print("💬 No relevant context found.")
547
  return "I could not find any relevant information.", []
548
 
549
- print(f"✅ Using {len(retrieved_context)} documents as context.")
550
  #answer = self.generate_answer(query, retrieved_context, history, is_followup)
551
 
552
  answer = self.generate_answer(query, context_docs, history, is_followup)
 
3
  from qdrant_client import QdrantClient, models
4
  from sentence_transformers import SentenceTransformer, CrossEncoder
5
  from pymongo import MongoClient
6
+
7
  from typing import List, Dict
8
  import google.generativeai as genai
9
  from groq import Groq
10
 
11
+ from embedding_model_instance import embedding_model_m3, embedding_dim_m3, embedding_model_large, embedding_dim_large, reranker
12
+ from qdrant_instance import qdrant_m3, qdrant_large
13
  from llm import gemini, groq
14
  from mongo_instance import db
15
  import json
16
  from bson import ObjectId
17
 
18
+
19
  def build_content(doc: dict, entity_type: str) -> str:
20
  """Convert MongoDB document into natural text for embeddings."""
21
  parts = [f"{entity_type} ID: {doc.get('id', str(doc.get('_id', '')))}"]
 
36
  class ErrorBot:
37
  """Chatbot using RAG (Qdrant + Gemini API)."""
38
 
39
+ def __init__(self, llm_model_name: str, llm_provider: str = "gemini", last_context: list = None):
40
  print("🚀 Initializing ErrorBot...")
41
  self.last_context = last_context
42
 
 
44
  # --- Embedding model
45
  # self.device = "cuda" if torch.cuda.is_available() else "cpu"
46
 
47
+ self.embedding_model_m3 = embedding_model_m3
48
+ self.embedding_dim_m3 = embedding_dim_m3
49
+
50
+ self.embedding_model_large = embedding_model_large
51
+ self.embedding_dim_large = embedding_dim_large
52
+
53
+
54
 
55
  self.db = db
56
  # --- Qdrant client
57
 
58
+ self.qdrant_m3 = qdrant_m3
59
+ self.qdrant_large = qdrant_large
60
  self.collection_name = "technical_errors"
61
+
62
+ #self.collection_name = "json_ingestion"
63
  #self._setup_collection()
64
 
65
  # --- LLM setup
 
82
  self.reranker = reranker
83
  print(f"✅ ErrorBot ready with {self.llm_provider.upper()}")
84
 
85
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
 
 
 
 
 
87
 
88
+ # def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.5, rerank: bool = True):
89
+ # query_embedding = self.embedding_model.encode(query).tolist()
90
+ # hits = self.qdrant.query_points(
91
+ # collection_name=self.collection_name,
92
+ # query=query_embedding,
93
+ # #limit=top_k * 3 if rerank else top_k,
94
+ # limit = 100,
95
+ # with_payload=True,
96
+ # score_threshold=score_threshold,
97
+ # search_params=models.SearchParams(hnsw_ef=256),
98
+ # ).points
99
+
100
+ # candidates = [
101
+ # {
102
+ # "id": hit.payload.get("id"),
103
+ # # "id": hit.payload.get("raw", {}).get("id"),
104
+
105
+ # "entity_type": hit.payload.get("entity_type", ""),
106
+ # "content": hit.payload.get("content", ""),
107
+ # "score": hit.score,
108
+ # }
109
+ # for hit in hits
110
+ # ]
111
+
112
+ # if rerank and candidates:
113
+ # pairs = [(query, c["content"]) for c in candidates]
114
+ # scores = self.reranker.predict(pairs)
115
+ # for i, score in enumerate(scores):
116
+ # candidates[i]["rerank_score"] = float(score)
117
+ # candidates = sorted(candidates, key=lambda x: x["rerank_score"], reverse=True)
118
+
119
+ # return candidates[:5]
120
+
121
+ # ==================================================
122
+ # 🧮 Dual Qdrant Ensemble Retrieval
123
+ # ==================================================
124
+ def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.5, rerank: bool = True):
125
+ """Retrieve documents using ensemble of BGE-M3 and BGE-Large models."""
126
+ print(f"\n🔍 Retrieving context using ensemble (M3 + BGE-Large) for query: {query}")
127
+
128
+ # 1️⃣ Encode using both models
129
+ emb_m3 = self.embedding_model_m3.encode(query).tolist()
130
+ emb_large = self.embedding_model_large.encode(query).tolist()
131
+
132
+ # 2️⃣ Query both Qdrant clusters
133
+ hits_m3 = self.qdrant_m3.query_points(
134
  collection_name=self.collection_name,
135
+ query=emb_m3,
136
+ limit=top_k * 3,
137
+ with_payload=True,
138
+ score_threshold=score_threshold,
139
+ ).points
140
+
141
+ hits_large = self.qdrant_large.query_points(
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  collection_name=self.collection_name,
143
+ query=emb_large,
144
+ limit=top_k * 3,
145
  with_payload=True,
146
  score_threshold=score_threshold,
147
  ).points
148
 
149
+ # 3️⃣ Combine results — average normalized scores
150
+ all_hits = []
151
+ for hit in hits_m3 + hits_large:
152
+ payload = hit.payload
153
+ score = hit.score
154
+ all_hits.append({
155
+ "id": payload.get("id"),
156
+ "entity_type": payload.get("entity_type", ""),
157
+ "content": payload.get("content", ""),
158
+ "score": score,
159
+ "source": "M3" if hit in hits_m3 else "LARGE"
160
+ })
161
+
162
+ if not all_hits:
163
+ print("⚠️ No hits from either model.")
164
+ return []
165
+
166
+ # Normalize scores between 0-1 (optional)
167
+ scores = [h["score"] for h in all_hits]
168
+ min_s, max_s = min(scores), max(scores)
169
+ for h in all_hits:
170
+ h["score_norm"] = (h["score"] - min_s) / (max_s - min_s + 1e-6)
171
+
172
+ # Group by ID and average scores if duplicates exist
173
+ merged = {}
174
+ for h in all_hits:
175
+ _id = h["id"]
176
+ if _id not in merged:
177
+ merged[_id] = h
178
+ else:
179
+ merged[_id]["score_norm"] = (merged[_id]["score_norm"] + h["score_norm"]) / 2
180
+
181
+ combined_hits = list(merged.values())
182
+ combined_hits = sorted(combined_hits, key=lambda x: x["score_norm"], reverse=True)[:top_k * 2]
183
+
184
+ # 4️⃣ (Optional) Rerank using cross encoder
185
+ if rerank and combined_hits:
186
+ pairs = [(query, h["content"]) for h in combined_hits]
187
  scores = self.reranker.predict(pairs)
188
+ for i, s in enumerate(scores):
189
+ combined_hits[i]["rerank_score"] = float(s)
190
+ combined_hits = sorted(combined_hits, key=lambda x: x["rerank_score"], reverse=True)
191
 
192
+ print(f"✅ Ensemble retrieved {len(combined_hits)} candidates.")
193
+ return combined_hits[:top_k]
194
 
195
  def generate_answer(self, query: str, context: List[Dict], history: list = None, is_followup: bool = False ):
196
  """
 
287
 
288
  Your tasks are:
289
  1. If the question is about PR, FA, or CR → Identify which information is relevant and explain clearly in simple, actionable language (summarize, don’t just repeat).
290
+ 2. If the question is about programming or algorithms → Provide a correct, clear, and well-structured code example in the requested language, with explanation.
291
  3. If the question is non-technical/general → Respond politely, clearly, and helpfully in a conversational style.
292
+ 4. Always keep answers and easy to understand and detailed.
293
 
294
  ### User Question:
295
 
 
326
  )
327
  return completion.choices[0].message.content.strip()
328
 
329
+
330
  def fetch_problem_report_with_links(self, pr_id: str):
331
 
332
  # --- Fetch Problem Report
333
  pr_doc = db["problemReports"].find_one({"id": pr_id})
334
+ #print("pr_id:", pr_id)
335
+ #print("pr_doc:", pr_doc)
336
  if not pr_doc:
337
  return None, [], [], [], []
338
 
 
365
  for doc in fa_docs:
366
  if "_id" in doc and isinstance(doc["_id"], ObjectId):
367
  doc["_id"] = str(doc["_id"])
368
+
369
+ print(pr_doc)
370
 
371
  return pr_doc, cr_ids, fa_ids, cr_docs, fa_docs
372
 
 
464
  )
465
  return completion.choices[0].message.content.strip(), []
466
 
467
+ elif is_followup and self.last_context:
468
  if not is_technical:
469
  print("⚠️ Non-technical followup → skipping Qdrant.")
470
  system_prompt = "You are a helpful assistant. Answer clearly and concisely."
 
587
  print("💬 No relevant context found.")
588
  return "I could not find any relevant information.", []
589
 
590
+ #print(f"✅ Using {len(retrieved_context)} documents as context.")
591
  #answer = self.generate_answer(query, retrieved_context, history, is_followup)
592
 
593
  answer = self.generate_answer(query, context_docs, history, is_followup)