Pulastya0 commited on
Commit
4376f5c
·
verified ·
1 Parent(s): 03e9d51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -25
app.py CHANGED
@@ -36,12 +36,8 @@ kb_collection = None
36
  @app.post("/setup")
37
  async def setup_kb(kb_file: UploadFile = File(...)):
38
  global kb_collection
39
- """
40
- Uploads a JSON file, generates embeddings, and populates a persistent ChromaDB knowledge base.
41
- Only runs when explicitly called.
42
- """
43
  try:
44
- # Read JSON directly from the uploaded file
45
  content_bytes = await kb_file.read()
46
  data = json.loads(content_bytes)
47
 
@@ -50,38 +46,42 @@ async def setup_kb(kb_file: UploadFile = File(...)):
50
 
51
  print(f"📘 Loaded {len(data)} items from uploaded file '{kb_file.filename}'")
52
 
53
- # Initialize encoder and Chroma client (only here)
54
  encoder = SentenceTransformer("all-MiniLM-L6-v2")
55
- chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
56
- kb_collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
57
- collection = kb_collection
58
 
59
- # Clear existing data (optional)
60
- if collection.count() > 0:
61
- print(f"🧹 Clearing {collection.count()} existing records...")
62
- collection.delete(ids=collection.get()['ids'])
 
 
 
 
63
 
64
- # Prepare data for embeddings
 
 
 
 
65
  texts, ids, metadatas = [], [], []
66
  for i, item in enumerate(data):
67
- content = item.get("content") or item.get("text") or ""
68
- title = item.get("title") or f"Document {i+1}"
69
  texts.append(f"{title}. {content}")
70
  ids.append(str(i))
71
- metadatas.append(item)
 
 
72
 
73
- print("🧠 Generating embeddings...")
74
  embeddings = encoder.encode(texts, show_progress_bar=True).tolist()
75
 
76
- # Add to ChromaDB
77
- print("💾 Adding to ChromaDB...")
78
- collection.add(ids=ids, embeddings=embeddings, metadatas=metadatas)
79
- print(f"✅ Successfully added {collection.count()} records to {COLLECTION_NAME}.")
80
 
81
- return {"message": "Knowledge base successfully initialized.", "count": collection.count()}
82
 
83
- except json.JSONDecodeError:
84
- raise HTTPException(status_code=400, detail="Invalid JSON file.")
85
  except Exception as e:
86
  raise HTTPException(status_code=500, detail=f"Setup failed: {e}")
87
 
 
36
  @app.post("/setup")
37
  async def setup_kb(kb_file: UploadFile = File(...)):
38
  global kb_collection
 
 
 
 
39
  try:
40
+ # Read JSON directly
41
  content_bytes = await kb_file.read()
42
  data = json.loads(content_bytes)
43
 
 
46
 
47
  print(f"📘 Loaded {len(data)} items from uploaded file '{kb_file.filename}'")
48
 
49
+ # Initialize Sentence Transformer
50
  encoder = SentenceTransformer("all-MiniLM-L6-v2")
 
 
 
51
 
52
+ # Initialize ChromaDB (persistent)
53
+ chroma_client = chromadb.PersistentClient(path="/tmp/chroma") # must be writable
54
+ kb_collection = chroma_client.get_or_create_collection(
55
+ name="knowledge_base",
56
+ embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction(
57
+ model_name="all-MiniLM-L6-v2"
58
+ )
59
+ )
60
 
61
+ # Clear old data
62
+ if kb_collection.count() > 0:
63
+ kb_collection.delete(ids=kb_collection.get()['ids'])
64
+
65
+ # Prepare data
66
  texts, ids, metadatas = [], [], []
67
  for i, item in enumerate(data):
68
+ content = item.get("answer") or item.get("content") or ""
69
+ title = item.get("id") or f"Document {i+1}"
70
  texts.append(f"{title}. {content}")
71
  ids.append(str(i))
72
+ # Only keep str/int/bool/float for metadata
73
+ metadata = {k: str(v) if isinstance(v, list) else v for k, v in item.items()}
74
+ metadatas.append(metadata)
75
 
76
+ # Generate embeddings
77
  embeddings = encoder.encode(texts, show_progress_bar=True).tolist()
78
 
79
+ # Add to Chroma
80
+ kb_collection.add(ids=ids, embeddings=embeddings, metadatas=metadatas)
81
+ print(f"✅ Successfully added {kb_collection.count()} records.")
 
82
 
83
+ return {"message": "Knowledge base successfully initialized.", "count": kb_collection.count()}
84
 
 
 
85
  except Exception as e:
86
  raise HTTPException(status_code=500, detail=f"Setup failed: {e}")
87