Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -36,12 +36,8 @@ kb_collection = None
|
|
| 36 |
@app.post("/setup")
|
| 37 |
async def setup_kb(kb_file: UploadFile = File(...)):
|
| 38 |
global kb_collection
|
| 39 |
-
"""
|
| 40 |
-
Uploads a JSON file, generates embeddings, and populates a persistent ChromaDB knowledge base.
|
| 41 |
-
Only runs when explicitly called.
|
| 42 |
-
"""
|
| 43 |
try:
|
| 44 |
-
# Read JSON directly
|
| 45 |
content_bytes = await kb_file.read()
|
| 46 |
data = json.loads(content_bytes)
|
| 47 |
|
|
@@ -50,38 +46,42 @@ async def setup_kb(kb_file: UploadFile = File(...)):
|
|
| 50 |
|
| 51 |
print(f"📘 Loaded {len(data)} items from uploaded file '{kb_file.filename}'")
|
| 52 |
|
| 53 |
-
# Initialize
|
| 54 |
encoder = SentenceTransformer("all-MiniLM-L6-v2")
|
| 55 |
-
chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
|
| 56 |
-
kb_collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
|
| 57 |
-
collection = kb_collection
|
| 58 |
|
| 59 |
-
#
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
texts, ids, metadatas = [], [], []
|
| 66 |
for i, item in enumerate(data):
|
| 67 |
-
content = item.get("
|
| 68 |
-
title = item.get("
|
| 69 |
texts.append(f"{title}. {content}")
|
| 70 |
ids.append(str(i))
|
| 71 |
-
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
|
| 74 |
embeddings = encoder.encode(texts, show_progress_bar=True).tolist()
|
| 75 |
|
| 76 |
-
# Add to
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
print(f"✅ Successfully added {collection.count()} records to {COLLECTION_NAME}.")
|
| 80 |
|
| 81 |
-
return {"message": "Knowledge base successfully initialized.", "count":
|
| 82 |
|
| 83 |
-
except json.JSONDecodeError:
|
| 84 |
-
raise HTTPException(status_code=400, detail="Invalid JSON file.")
|
| 85 |
except Exception as e:
|
| 86 |
raise HTTPException(status_code=500, detail=f"Setup failed: {e}")
|
| 87 |
|
|
|
|
| 36 |
@app.post("/setup")
|
| 37 |
async def setup_kb(kb_file: UploadFile = File(...)):
|
| 38 |
global kb_collection
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
try:
|
| 40 |
+
# Read JSON directly
|
| 41 |
content_bytes = await kb_file.read()
|
| 42 |
data = json.loads(content_bytes)
|
| 43 |
|
|
|
|
| 46 |
|
| 47 |
print(f"📘 Loaded {len(data)} items from uploaded file '{kb_file.filename}'")
|
| 48 |
|
| 49 |
+
# Initialize Sentence Transformer
|
| 50 |
encoder = SentenceTransformer("all-MiniLM-L6-v2")
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
+
# Initialize ChromaDB (persistent)
|
| 53 |
+
chroma_client = chromadb.PersistentClient(path="/tmp/chroma") # must be writable
|
| 54 |
+
kb_collection = chroma_client.get_or_create_collection(
|
| 55 |
+
name="knowledge_base",
|
| 56 |
+
embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction(
|
| 57 |
+
model_name="all-MiniLM-L6-v2"
|
| 58 |
+
)
|
| 59 |
+
)
|
| 60 |
|
| 61 |
+
# Clear old data
|
| 62 |
+
if kb_collection.count() > 0:
|
| 63 |
+
kb_collection.delete(ids=kb_collection.get()['ids'])
|
| 64 |
+
|
| 65 |
+
# Prepare data
|
| 66 |
texts, ids, metadatas = [], [], []
|
| 67 |
for i, item in enumerate(data):
|
| 68 |
+
content = item.get("answer") or item.get("content") or ""
|
| 69 |
+
title = item.get("id") or f"Document {i+1}"
|
| 70 |
texts.append(f"{title}. {content}")
|
| 71 |
ids.append(str(i))
|
| 72 |
+
# Only keep str/int/bool/float for metadata
|
| 73 |
+
metadata = {k: str(v) if isinstance(v, list) else v for k, v in item.items()}
|
| 74 |
+
metadatas.append(metadata)
|
| 75 |
|
| 76 |
+
# Generate embeddings
|
| 77 |
embeddings = encoder.encode(texts, show_progress_bar=True).tolist()
|
| 78 |
|
| 79 |
+
# Add to Chroma
|
| 80 |
+
kb_collection.add(ids=ids, embeddings=embeddings, metadatas=metadatas)
|
| 81 |
+
print(f"✅ Successfully added {kb_collection.count()} records.")
|
|
|
|
| 82 |
|
| 83 |
+
return {"message": "Knowledge base successfully initialized.", "count": kb_collection.count()}
|
| 84 |
|
|
|
|
|
|
|
| 85 |
except Exception as e:
|
| 86 |
raise HTTPException(status_code=500, detail=f"Setup failed: {e}")
|
| 87 |
|