Pulastya0 commited on
Commit
3e0cda0
·
verified ·
1 Parent(s): b61d8ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -39
app.py CHANGED
@@ -25,56 +25,62 @@ class SetupRequest(BaseModel):
25
  # -------------------------------
26
  # Persistent Chroma client
27
  # -------------------------------
 
28
  COLLECTION_NAME = "knowledge_base"
29
 
30
- # Initialize in-memory client
31
- chroma_client = chromadb.Client()
32
-
33
- # Create or get collection
34
- kb_collection: Collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
35
-
36
- print("✅ In-memory Knowledge Base initialized successfully.")
37
-
38
  # -------------------------------
39
  # KB Setup Endpoint
40
  # -------------------------------
41
  @app.post("/setup")
42
- async def setup_endpoint(kb_file: UploadFile = File(...)):
43
  """
44
- Upload KB JSON file and embed it in ChromaDB.
45
- JSON format: {"knowledge_base": [{"id": "1", "answer": "...", "category": "..."}]}
46
  """
47
- global kb_collection
48
- if not kb_collection:
49
- raise HTTPException(status_code=500, detail="Chroma KB not initialized")
50
 
51
  try:
52
- content = await kb_file.read()
53
- kb_data = json.loads(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  except Exception as e:
55
- raise HTTPException(status_code=400, detail=f"Failed to read JSON: {e}")
56
-
57
- if "knowledge_base" not in kb_data:
58
- raise HTTPException(status_code=400, detail="Invalid KB format, missing 'knowledge_base' key")
59
-
60
- # Clear existing entries before adding new KB
61
- kb_collection.delete(where={"id": {"$ne": None}}) # Delete all previous entries
62
-
63
- # Add KB entries
64
- for entry in kb_data["knowledge_base"]:
65
- kb_collection.add(
66
- documents=[entry["answer"]],
67
- metadatas=[{
68
- "id": entry["id"],
69
- "category": entry.get("category", ""),
70
- "question_variations": entry.get("question_variations", []),
71
- "keywords": entry.get("keywords", [])
72
- }],
73
- ids=[entry["id"]]
74
- )
75
-
76
- kb_collection.persist()
77
- return {"status": f"KB uploaded and stored successfully, {len(kb_data['knowledge_base'])} entries added"}
78
 
79
  # -------------------------------
80
  # Step-by-Step Endpoints
 
25
  # -------------------------------
26
  # Persistent Chroma client
27
  # -------------------------------
28
+ CHROMA_PATH = "/data/chroma"
29
  COLLECTION_NAME = "knowledge_base"
30
 
 
 
 
 
 
 
 
 
31
  # -------------------------------
32
  # KB Setup Endpoint
33
  # -------------------------------
34
  @app.post("/setup")
35
+ def setup_kb(json_file_path: str):
36
  """
37
+ Loads a JSON file, generates embeddings, and populates a persistent ChromaDB knowledge base.
38
+ Only runs when explicitly called.
39
  """
40
+ if not os.path.exists(json_file_path):
41
+ raise HTTPException(status_code=404, detail=f"File not found: {json_file_path}")
 
42
 
43
  try:
44
+ # Load JSON data
45
+ with open(json_file_path, "r", encoding="utf-8") as f:
46
+ data = json.load(f)
47
+
48
+ if not isinstance(data, list):
49
+ raise HTTPException(status_code=400, detail="JSON must contain a list of knowledge items.")
50
+
51
+ print(f"📘 Loaded {len(data)} items from {json_file_path}")
52
+
53
+ # Initialize encoder and Chroma client (only here)
54
+ encoder = SentenceTransformer("all-MiniLM-L6-v2")
55
+ chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
56
+ collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
57
+
58
+ # Clear existing data (optional)
59
+ if collection.count() > 0:
60
+ print(f"🧹 Clearing {collection.count()} existing records...")
61
+ collection.delete(ids=collection.get()['ids'])
62
+
63
+ # Prepare data for embeddings
64
+ texts, ids, metadatas = [], [], []
65
+ for i, item in enumerate(data):
66
+ content = item.get("content") or item.get("text") or ""
67
+ title = item.get("title") or f"Document {i+1}"
68
+ texts.append(f"{title}. {content}")
69
+ ids.append(str(i))
70
+ metadatas.append(item)
71
+
72
+ print("🧠 Generating embeddings...")
73
+ embeddings = encoder.encode(texts, show_progress_bar=True).tolist()
74
+
75
+ # Add to ChromaDB
76
+ print("💾 Adding to ChromaDB...")
77
+ collection.add(ids=ids, embeddings=embeddings, metadatas=metadatas)
78
+ print(f"✅ Successfully added {collection.count()} records to {COLLECTION_NAME}.")
79
+
80
+ return {"message": "Knowledge base successfully initialized.", "count": collection.count()}
81
+
82
  except Exception as e:
83
+ raise HTTPException(status_code=500, detail=f"Setup failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  # -------------------------------
86
  # Step-by-Step Endpoints