Spaces:
Sleeping
Sleeping
Update agent_langchain.py
Browse files- agent_langchain.py +53 -33
agent_langchain.py
CHANGED
|
@@ -88,64 +88,84 @@ def call_routing(text, retries=3, delay=5):
|
|
| 88 |
return "General IT"
|
| 89 |
|
| 90 |
# ==============================================================
|
| 91 |
-
# 📚 KNOWLEDGE BASE SETUP
|
| 92 |
# ==============================================================
|
| 93 |
-
|
| 94 |
-
CHROMA_PATH = "/data/chroma"
|
| 95 |
-
COLLECTION_NAME = "knowledge_base"
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
kb_collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
|
| 100 |
-
except Exception as e:
|
| 101 |
-
kb_collection = None
|
| 102 |
-
print("⚠️ Could not initialize KB:", e)
|
| 103 |
|
| 104 |
-
#
|
| 105 |
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
|
| 106 |
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
| 107 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
-
def cosine_similarity(vec1, vec2):
|
| 110 |
-
"""Compute cosine similarity between two vectors."""
|
| 111 |
-
if np.linalg.norm(vec1) == 0 or np.linalg.norm(vec2) == 0:
|
| 112 |
-
return 0.0
|
| 113 |
-
return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
| 114 |
|
| 115 |
def query_kb(text: str, top_k: int = 1):
|
| 116 |
"""
|
| 117 |
-
Query the knowledge base for relevant solutions
|
| 118 |
-
Returns
|
| 119 |
"""
|
|
|
|
| 120 |
if not kb_collection:
|
| 121 |
-
return {"answer": "⚠️
|
| 122 |
|
| 123 |
try:
|
| 124 |
-
# Embed the query
|
| 125 |
query_vector = embedding_func([text])[0]
|
| 126 |
-
|
| 127 |
-
# Query Chroma
|
| 128 |
results = kb_collection.query(query_embeddings=[query_vector], n_results=top_k)
|
| 129 |
if not results or not results.get("documents") or len(results["documents"][0]) == 0:
|
| 130 |
return {"answer": "No relevant KB entry found.", "confidence": 0.0, "metadata": {}}
|
| 131 |
|
| 132 |
-
# Extract best match
|
| 133 |
best_doc = results["documents"][0][0]
|
| 134 |
best_meta = results["metadatas"][0][0]
|
| 135 |
best_vec = np.array(results["embeddings"][0][0]) if "embeddings" in results else query_vector
|
| 136 |
|
| 137 |
-
|
| 138 |
-
confidence = float(cosine_similarity(query_vector, best_vec))
|
| 139 |
-
|
| 140 |
-
return {
|
| 141 |
-
"answer": best_doc,
|
| 142 |
-
"confidence": round(confidence, 3),
|
| 143 |
-
"metadata": best_meta
|
| 144 |
-
}
|
| 145 |
|
|
|
|
| 146 |
except Exception as e:
|
| 147 |
-
print(
|
| 148 |
-
return {"answer": "Error
|
|
|
|
| 149 |
|
| 150 |
# ==============================================================
|
| 151 |
# 🧠 GEMINI LLM (Official LangChain Integration)
|
|
|
|
| 88 |
return "General IT"
|
| 89 |
|
| 90 |
# ==============================================================
|
| 91 |
+
# 📚 KNOWLEDGE BASE SETUP (PATCHED)
|
| 92 |
# ==============================================================
|
| 93 |
+
import threading
|
|
|
|
|
|
|
| 94 |
|
| 95 |
+
CHROMA_PATH = "/data/chroma" # Persistent path in Space
|
| 96 |
+
COLLECTION_NAME = "knowledge_base"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
+
kb_collection = None # Global placeholder, initialized via /setup
|
| 99 |
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
|
| 100 |
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
| 101 |
)
|
| 102 |
+
kb_lock = threading.Lock() # Ensure thread-safe updates
|
| 103 |
+
|
| 104 |
+
def initialize_kb(json_texts: list, metadatas: list):
|
| 105 |
+
"""
|
| 106 |
+
Initialize or populate the knowledge base with embeddings.
|
| 107 |
+
This is called from /setup endpoint.
|
| 108 |
+
"""
|
| 109 |
+
global kb_collection
|
| 110 |
+
with kb_lock:
|
| 111 |
+
try:
|
| 112 |
+
# Create persistent client (only now)
|
| 113 |
+
chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
|
| 114 |
+
kb_collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
|
| 115 |
+
except Exception as e:
|
| 116 |
+
print("⚠️ Could not initialize persistent KB, using in-memory fallback:", e)
|
| 117 |
+
chroma_client = chromadb.Client() # fallback in-memory
|
| 118 |
+
kb_collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
|
| 119 |
+
|
| 120 |
+
# Clear existing entries safely
|
| 121 |
+
try:
|
| 122 |
+
if kb_collection.count() > 0:
|
| 123 |
+
# Use a safe delete by fetching all ids first
|
| 124 |
+
ids_to_delete = kb_collection.get()['ids']
|
| 125 |
+
kb_collection.delete(ids=ids_to_delete)
|
| 126 |
+
except Exception as e:
|
| 127 |
+
print("⚠️ Could not clear KB:", e)
|
| 128 |
+
|
| 129 |
+
# Generate embeddings
|
| 130 |
+
try:
|
| 131 |
+
vectors = embedding_func(json_texts)
|
| 132 |
+
kb_collection.add(
|
| 133 |
+
ids=[str(i) for i in range(len(json_texts))],
|
| 134 |
+
embeddings=vectors,
|
| 135 |
+
metadatas=metadatas,
|
| 136 |
+
documents=json_texts
|
| 137 |
+
)
|
| 138 |
+
print(f"✅ KB initialized with {len(json_texts)} entries")
|
| 139 |
+
except Exception as e:
|
| 140 |
+
print("⚠️ Failed to populate KB:", e)
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
def query_kb(text: str, top_k: int = 1):
|
| 144 |
"""
|
| 145 |
+
Query the knowledge base for relevant solutions.
|
| 146 |
+
Returns {"answer": str, "confidence": float, "metadata": dict}
|
| 147 |
"""
|
| 148 |
+
global kb_collection
|
| 149 |
if not kb_collection:
|
| 150 |
+
return {"answer": "⚠️ KB not initialized. Please call /setup first.", "confidence": 0.0, "metadata": {}}
|
| 151 |
|
| 152 |
try:
|
|
|
|
| 153 |
query_vector = embedding_func([text])[0]
|
|
|
|
|
|
|
| 154 |
results = kb_collection.query(query_embeddings=[query_vector], n_results=top_k)
|
| 155 |
if not results or not results.get("documents") or len(results["documents"][0]) == 0:
|
| 156 |
return {"answer": "No relevant KB entry found.", "confidence": 0.0, "metadata": {}}
|
| 157 |
|
|
|
|
| 158 |
best_doc = results["documents"][0][0]
|
| 159 |
best_meta = results["metadatas"][0][0]
|
| 160 |
best_vec = np.array(results["embeddings"][0][0]) if "embeddings" in results else query_vector
|
| 161 |
|
| 162 |
+
confidence = float(np.dot(query_vector, best_vec) / (np.linalg.norm(query_vector) * np.linalg.norm(best_vec) + 1e-8))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
+
return {"answer": best_doc, "confidence": round(confidence, 3), "metadata": best_meta}
|
| 165 |
except Exception as e:
|
| 166 |
+
print("⚠️ KB query failed:", e)
|
| 167 |
+
return {"answer": "Error querying KB.", "confidence": 0.0, "metadata": {}}
|
| 168 |
+
|
| 169 |
|
| 170 |
# ==============================================================
|
| 171 |
# 🧠 GEMINI LLM (Official LangChain Integration)
|