Pulastya0 commited on
Commit
1dc1a72
·
verified ·
1 Parent(s): 3e0cda0

Update agent_langchain.py

Browse files
Files changed (1) hide show
  1. agent_langchain.py +53 -33
agent_langchain.py CHANGED
@@ -88,64 +88,84 @@ def call_routing(text, retries=3, delay=5):
88
  return "General IT"
89
 
90
  # ==============================================================
91
- # 📚 KNOWLEDGE BASE SETUP
92
  # ==============================================================
93
- # Persistent Chroma client (new API)
94
- CHROMA_PATH = "/data/chroma"
95
- COLLECTION_NAME = "knowledge_base"
96
 
97
- try:
98
- chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
99
- kb_collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
100
- except Exception as e:
101
- kb_collection = None
102
- print("⚠️ Could not initialize KB:", e)
103
 
104
- # Sentence Transformer Embedding Function
105
  embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
106
  model_name="sentence-transformers/all-MiniLM-L6-v2"
107
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- def cosine_similarity(vec1, vec2):
110
- """Compute cosine similarity between two vectors."""
111
- if np.linalg.norm(vec1) == 0 or np.linalg.norm(vec2) == 0:
112
- return 0.0
113
- return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
114
 
115
  def query_kb(text: str, top_k: int = 1):
116
  """
117
- Query the knowledge base for relevant solutions using Chroma.
118
- Returns: {"answer": str, "confidence": float, "metadata": dict}
119
  """
 
120
  if not kb_collection:
121
- return {"answer": "⚠️ Knowledge Base not initialized.", "confidence": 0.0, "metadata": {}}
122
 
123
  try:
124
- # Embed the query
125
  query_vector = embedding_func([text])[0]
126
-
127
- # Query Chroma
128
  results = kb_collection.query(query_embeddings=[query_vector], n_results=top_k)
129
  if not results or not results.get("documents") or len(results["documents"][0]) == 0:
130
  return {"answer": "No relevant KB entry found.", "confidence": 0.0, "metadata": {}}
131
 
132
- # Extract best match
133
  best_doc = results["documents"][0][0]
134
  best_meta = results["metadatas"][0][0]
135
  best_vec = np.array(results["embeddings"][0][0]) if "embeddings" in results else query_vector
136
 
137
- # Compute similarity confidence manually
138
- confidence = float(cosine_similarity(query_vector, best_vec))
139
-
140
- return {
141
- "answer": best_doc,
142
- "confidence": round(confidence, 3),
143
- "metadata": best_meta
144
- }
145
 
 
146
  except Exception as e:
147
- print(f"⚠️ KB query failed: {e}")
148
- return {"answer": "Error accessing KB.", "confidence": 0.0, "metadata": {}}
 
149
 
150
  # ==============================================================
151
  # 🧠 GEMINI LLM (Official LangChain Integration)
 
88
  return "General IT"
89
 
90
  # ==============================================================
91
+ # 📚 KNOWLEDGE BASE SETUP (PATCHED)
92
  # ==============================================================
93
+ import threading
 
 
94
 
95
+ CHROMA_PATH = "/data/chroma" # Persistent path in Space
96
+ COLLECTION_NAME = "knowledge_base"
 
 
 
 
97
 
98
+ kb_collection = None # Global placeholder, initialized via /setup
99
  embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
100
  model_name="sentence-transformers/all-MiniLM-L6-v2"
101
  )
102
+ kb_lock = threading.Lock() # Ensure thread-safe updates
103
+
104
+ def initialize_kb(json_texts: list, metadatas: list):
105
+ """
106
+ Initialize or populate the knowledge base with embeddings.
107
+ This is called from /setup endpoint.
108
+ """
109
+ global kb_collection
110
+ with kb_lock:
111
+ try:
112
+ # Create persistent client (only now)
113
+ chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
114
+ kb_collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
115
+ except Exception as e:
116
+ print("⚠️ Could not initialize persistent KB, using in-memory fallback:", e)
117
+ chroma_client = chromadb.Client() # fallback in-memory
118
+ kb_collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
119
+
120
+ # Clear existing entries safely
121
+ try:
122
+ if kb_collection.count() > 0:
123
+ # Use a safe delete by fetching all ids first
124
+ ids_to_delete = kb_collection.get()['ids']
125
+ kb_collection.delete(ids=ids_to_delete)
126
+ except Exception as e:
127
+ print("⚠️ Could not clear KB:", e)
128
+
129
+ # Generate embeddings
130
+ try:
131
+ vectors = embedding_func(json_texts)
132
+ kb_collection.add(
133
+ ids=[str(i) for i in range(len(json_texts))],
134
+ embeddings=vectors,
135
+ metadatas=metadatas,
136
+ documents=json_texts
137
+ )
138
+ print(f"✅ KB initialized with {len(json_texts)} entries")
139
+ except Exception as e:
140
+ print("⚠️ Failed to populate KB:", e)
141
 
 
 
 
 
 
142
 
143
  def query_kb(text: str, top_k: int = 1):
144
  """
145
+ Query the knowledge base for relevant solutions.
146
+ Returns {"answer": str, "confidence": float, "metadata": dict}
147
  """
148
+ global kb_collection
149
  if not kb_collection:
150
+ return {"answer": "⚠️ KB not initialized. Please call /setup first.", "confidence": 0.0, "metadata": {}}
151
 
152
  try:
 
153
  query_vector = embedding_func([text])[0]
 
 
154
  results = kb_collection.query(query_embeddings=[query_vector], n_results=top_k)
155
  if not results or not results.get("documents") or len(results["documents"][0]) == 0:
156
  return {"answer": "No relevant KB entry found.", "confidence": 0.0, "metadata": {}}
157
 
 
158
  best_doc = results["documents"][0][0]
159
  best_meta = results["metadatas"][0][0]
160
  best_vec = np.array(results["embeddings"][0][0]) if "embeddings" in results else query_vector
161
 
162
+ confidence = float(np.dot(query_vector, best_vec) / (np.linalg.norm(query_vector) * np.linalg.norm(best_vec) + 1e-8))
 
 
 
 
 
 
 
163
 
164
+ return {"answer": best_doc, "confidence": round(confidence, 3), "metadata": best_meta}
165
  except Exception as e:
166
+ print("⚠️ KB query failed:", e)
167
+ return {"answer": "Error querying KB.", "confidence": 0.0, "metadata": {}}
168
+
169
 
170
  # ==============================================================
171
  # 🧠 GEMINI LLM (Official LangChain Integration)