Spaces:
Running
Running
Crcs1225
commited on
Commit
Β·
c135be2
1
Parent(s):
1333c38
hey
Browse files- database.py +53 -10
- generate_embeddings.py +49 -0
- main.py +26 -133
database.py
CHANGED
|
@@ -14,18 +14,18 @@ class Database:
|
|
| 14 |
self.client = motor.motor_asyncio.AsyncIOMotorClient(settings.MONGODB_URI)
|
| 15 |
self.db = self.client[settings.DATABASE_NAME]
|
| 16 |
self.collection = self.db[settings.COLLECTION_NAME]
|
|
|
|
| 17 |
|
| 18 |
async def similarity_search(self, query_embedding: List[float], limit: int = 3) -> List[Dict]:
|
| 19 |
-
"""Search for similar products using
|
| 20 |
try:
|
| 21 |
-
# First try vector search if index exists
|
| 22 |
pipeline = [
|
| 23 |
{
|
| 24 |
"$vectorSearch": {
|
| 25 |
-
"index": "vector_index", #
|
| 26 |
"path": "embedding",
|
| 27 |
"queryVector": query_embedding,
|
| 28 |
-
"numCandidates":
|
| 29 |
"limit": limit
|
| 30 |
}
|
| 31 |
},
|
|
@@ -46,7 +46,7 @@ class Database:
|
|
| 46 |
async for doc in cursor:
|
| 47 |
results.append({
|
| 48 |
"id": str(doc["_id"]),
|
| 49 |
-
"content":
|
| 50 |
"source": doc.get('title', 'product_database'),
|
| 51 |
"metadata": {
|
| 52 |
"category": doc.get('category', 'N/A'),
|
|
@@ -56,12 +56,45 @@ class Database:
|
|
| 56 |
})
|
| 57 |
return results
|
| 58 |
except Exception as e:
|
| 59 |
-
print(f"Vector search
|
| 60 |
-
# Fallback to text search
|
| 61 |
-
return await self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
async def search_by_category(self, category: str, limit: int = 5) -> List[Dict]:
|
| 64 |
-
"""Search products by category
|
| 65 |
cursor = self.collection.find(
|
| 66 |
{"category": {"$regex": category, "$options": "i"}}
|
| 67 |
).limit(limit)
|
|
@@ -70,7 +103,7 @@ class Database:
|
|
| 70 |
async for doc in cursor:
|
| 71 |
results.append({
|
| 72 |
"id": str(doc["_id"]),
|
| 73 |
-
"content":
|
| 74 |
"source": doc.get('title', 'product_database'),
|
| 75 |
"metadata": {
|
| 76 |
"category": doc.get('category', 'N/A'),
|
|
@@ -83,6 +116,16 @@ class Database:
|
|
| 83 |
"""Insert documents into the collection"""
|
| 84 |
result = await self.collection.insert_many(documents)
|
| 85 |
return [str(id) for id in result.inserted_ids]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
# Global database instance
|
| 88 |
db = Database()
|
|
|
|
| 14 |
self.client = motor.motor_asyncio.AsyncIOMotorClient(settings.MONGODB_URI)
|
| 15 |
self.db = self.client[settings.DATABASE_NAME]
|
| 16 |
self.collection = self.db[settings.COLLECTION_NAME]
|
| 17 |
+
print(f"β
Connected to MongoDB: {settings.DATABASE_NAME}.{settings.COLLECTION_NAME}")
|
| 18 |
|
| 19 |
async def similarity_search(self, query_embedding: List[float], limit: int = 3) -> List[Dict]:
|
| 20 |
+
"""Search for similar products using MongoDB Atlas Vector Search"""
|
| 21 |
try:
|
|
|
|
| 22 |
pipeline = [
|
| 23 |
{
|
| 24 |
"$vectorSearch": {
|
| 25 |
+
"index": "vector_index", # Make sure this matches your Atlas index name
|
| 26 |
"path": "embedding",
|
| 27 |
"queryVector": query_embedding,
|
| 28 |
+
"numCandidates": 150,
|
| 29 |
"limit": limit
|
| 30 |
}
|
| 31 |
},
|
|
|
|
| 46 |
async for doc in cursor:
|
| 47 |
results.append({
|
| 48 |
"id": str(doc["_id"]),
|
| 49 |
+
"content": self._create_product_content(doc),
|
| 50 |
"source": doc.get('title', 'product_database'),
|
| 51 |
"metadata": {
|
| 52 |
"category": doc.get('category', 'N/A'),
|
|
|
|
| 56 |
})
|
| 57 |
return results
|
| 58 |
except Exception as e:
|
| 59 |
+
print(f"β Vector search error: {e}")
|
| 60 |
+
# Fallback to text search
|
| 61 |
+
return await self.search_by_text("tops", limit)
|
| 62 |
+
|
| 63 |
+
def _create_product_content(self, doc: Dict) -> str:
|
| 64 |
+
"""Create formatted product content for the LLM"""
|
| 65 |
+
content_parts = [
|
| 66 |
+
f"Product: {doc.get('title', 'N/A')}",
|
| 67 |
+
f"Description: {doc.get('product_description', 'N/A')}",
|
| 68 |
+
f"Category: {doc.get('category', 'N/A')}",
|
| 69 |
+
f"Price: βΉ{doc.get('final_price', 'N/A')}"
|
| 70 |
+
]
|
| 71 |
+
return ". ".join(content_parts)
|
| 72 |
+
|
| 73 |
+
async def search_by_text(self, query: str, limit: int = 5) -> List[Dict]:
|
| 74 |
+
"""Fallback text search if vector search fails"""
|
| 75 |
+
cursor = self.collection.find({
|
| 76 |
+
"$or": [
|
| 77 |
+
{"title": {"$regex": query, "$options": "i"}},
|
| 78 |
+
{"category": {"$regex": query, "$options": "i"}},
|
| 79 |
+
{"product_description": {"$regex": query, "$options": "i"}}
|
| 80 |
+
]
|
| 81 |
+
}).limit(limit)
|
| 82 |
+
|
| 83 |
+
results = []
|
| 84 |
+
async for doc in cursor:
|
| 85 |
+
results.append({
|
| 86 |
+
"id": str(doc["_id"]),
|
| 87 |
+
"content": self._create_product_content(doc),
|
| 88 |
+
"source": doc.get('title', 'product_database'),
|
| 89 |
+
"metadata": {
|
| 90 |
+
"category": doc.get('category', 'N/A'),
|
| 91 |
+
"price": doc.get('final_price', 'N/A')
|
| 92 |
+
}
|
| 93 |
+
})
|
| 94 |
+
return results
|
| 95 |
|
| 96 |
async def search_by_category(self, category: str, limit: int = 5) -> List[Dict]:
|
| 97 |
+
"""Search products by category"""
|
| 98 |
cursor = self.collection.find(
|
| 99 |
{"category": {"$regex": category, "$options": "i"}}
|
| 100 |
).limit(limit)
|
|
|
|
| 103 |
async for doc in cursor:
|
| 104 |
results.append({
|
| 105 |
"id": str(doc["_id"]),
|
| 106 |
+
"content": self._create_product_content(doc),
|
| 107 |
"source": doc.get('title', 'product_database'),
|
| 108 |
"metadata": {
|
| 109 |
"category": doc.get('category', 'N/A'),
|
|
|
|
| 116 |
"""Insert documents into the collection"""
|
| 117 |
result = await self.collection.insert_many(documents)
|
| 118 |
return [str(id) for id in result.inserted_ids]
|
| 119 |
+
|
| 120 |
+
async def get_collection_stats(self):
|
| 121 |
+
"""Get collection statistics"""
|
| 122 |
+
total_docs = await self.collection.count_documents({})
|
| 123 |
+
docs_with_embeddings = await self.collection.count_documents({"embedding": {"$exists": True}})
|
| 124 |
+
return {
|
| 125 |
+
"total_documents": total_docs,
|
| 126 |
+
"documents_with_embeddings": docs_with_embeddings,
|
| 127 |
+
"embedding_coverage": f"{(docs_with_embeddings/total_docs*100):.1f}%" if total_docs > 0 else "0%"
|
| 128 |
+
}
|
| 129 |
|
| 130 |
# Global database instance
|
| 131 |
db = Database()
|
generate_embeddings.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import time
|
| 3 |
+
from database import db
|
| 4 |
+
from rag_system import rag_pipeline
|
| 5 |
+
|
| 6 |
+
def build_content_string(doc: dict) -> str:
|
| 7 |
+
parts = []
|
| 8 |
+
if doc.get("title"):
|
| 9 |
+
parts.append(f"Title: {doc['title']}")
|
| 10 |
+
if doc.get("product_description"):
|
| 11 |
+
parts.append(f"Description: {doc['product_description']}")
|
| 12 |
+
if doc.get("category"):
|
| 13 |
+
parts.append(f"Category: {doc['category']}")
|
| 14 |
+
|
| 15 |
+
for key, value in doc.items():
|
| 16 |
+
if key in ["_id", "embedding", "title", "product_description", "category"]:
|
| 17 |
+
continue
|
| 18 |
+
if isinstance(value, (str, int, float)):
|
| 19 |
+
parts.append(f"{key}: {value}")
|
| 20 |
+
|
| 21 |
+
return ". ".join(str(p) for p in parts if p)
|
| 22 |
+
|
| 23 |
+
async def generate_and_store_embeddings():
|
| 24 |
+
await db.connect()
|
| 25 |
+
cursor = db.collection.find({"embedding": {"$exists": False}})
|
| 26 |
+
updated_count = 0
|
| 27 |
+
batch_size = 20
|
| 28 |
+
|
| 29 |
+
async for doc in cursor:
|
| 30 |
+
try:
|
| 31 |
+
content = build_content_string(doc)
|
| 32 |
+
if content.strip():
|
| 33 |
+
embedding = await rag_pipeline.get_embeddings([content])
|
| 34 |
+
await db.collection.update_one(
|
| 35 |
+
{"_id": doc["_id"]},
|
| 36 |
+
{"$set": {"embedding": embedding[0]}}
|
| 37 |
+
)
|
| 38 |
+
updated_count += 1
|
| 39 |
+
if updated_count % 10 == 0:
|
| 40 |
+
print(f"β
Processed {updated_count} documents...")
|
| 41 |
+
except Exception as e:
|
| 42 |
+
print(f"β Error processing {doc.get('_id')}: {e}")
|
| 43 |
+
continue
|
| 44 |
+
time.sleep(0.2) # small delay to avoid overload
|
| 45 |
+
|
| 46 |
+
print(f"π Embedding generation completed! {updated_count} documents updated.")
|
| 47 |
+
|
| 48 |
+
if __name__ == "__main__":
|
| 49 |
+
asyncio.run(generate_and_store_embeddings())
|
main.py
CHANGED
|
@@ -32,30 +32,22 @@ embeddings_generated = False
|
|
| 32 |
|
| 33 |
@app.on_event("startup")
|
| 34 |
async def startup_event():
|
| 35 |
-
"""Run on application startup"""
|
| 36 |
-
global embeddings_generated
|
| 37 |
try:
|
| 38 |
print("π Starting RAG Chatbot API...")
|
| 39 |
|
| 40 |
# Initialize database connection
|
| 41 |
await db.connect()
|
| 42 |
|
| 43 |
-
# Check
|
| 44 |
-
|
| 45 |
-
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
if total_docs > 0 and docs_with_embeddings == 0:
|
| 51 |
-
print("π No embeddings found. Starting automatic embedding generation...")
|
| 52 |
-
await generate_embeddings_on_startup()
|
| 53 |
-
embeddings_generated = True
|
| 54 |
-
elif docs_with_embeddings > 0:
|
| 55 |
-
print(f"β
Embeddings already exist for {docs_with_embeddings} documents")
|
| 56 |
-
embeddings_generated = True
|
| 57 |
else:
|
| 58 |
-
print("
|
| 59 |
|
| 60 |
print("β
RAG Chatbot API is ready!")
|
| 61 |
|
|
@@ -63,74 +55,6 @@ async def startup_event():
|
|
| 63 |
print(f"β Startup error: {e}")
|
| 64 |
raise
|
| 65 |
|
| 66 |
-
async def generate_embeddings_on_startup():
|
| 67 |
-
"""Generate embeddings for all documents on startup"""
|
| 68 |
-
try:
|
| 69 |
-
# Find all documents without embeddings
|
| 70 |
-
cursor = db.collection.find({"embedding": {"$exists": False}})
|
| 71 |
-
documents_without_embeddings = []
|
| 72 |
-
async for doc in cursor:
|
| 73 |
-
documents_without_embeddings.append(doc)
|
| 74 |
-
|
| 75 |
-
if not documents_without_embeddings:
|
| 76 |
-
print("β
All documents already have embeddings")
|
| 77 |
-
return
|
| 78 |
-
|
| 79 |
-
print(f"π Generating embeddings for {len(documents_without_embeddings)} documents...")
|
| 80 |
-
|
| 81 |
-
updated_count = 0
|
| 82 |
-
errors = 0
|
| 83 |
-
|
| 84 |
-
# Process in smaller batches to avoid timeout
|
| 85 |
-
batch_size = 50
|
| 86 |
-
for i in range(0, len(documents_without_embeddings), batch_size):
|
| 87 |
-
batch = documents_without_embeddings[i:i + batch_size]
|
| 88 |
-
|
| 89 |
-
for doc in batch:
|
| 90 |
-
try:
|
| 91 |
-
# Create meaningful content for embedding
|
| 92 |
-
content_parts = []
|
| 93 |
-
|
| 94 |
-
# Include all relevant text fields
|
| 95 |
-
if doc.get('title'):
|
| 96 |
-
content_parts.append(f"Product: {doc['title']}")
|
| 97 |
-
if doc.get('product_description'):
|
| 98 |
-
content_parts.append(f"Description: {doc['product_description']}")
|
| 99 |
-
if doc.get('category'):
|
| 100 |
-
content_parts.append(f"Category: {doc['category']}")
|
| 101 |
-
|
| 102 |
-
content = ". ".join(content_parts)
|
| 103 |
-
|
| 104 |
-
if content.strip():
|
| 105 |
-
# Generate embedding
|
| 106 |
-
embedding = await rag_pipeline.get_embeddings([content])
|
| 107 |
-
|
| 108 |
-
# Update document with embedding
|
| 109 |
-
await db.collection.update_one(
|
| 110 |
-
{"_id": doc["_id"]},
|
| 111 |
-
{"$set": {"embedding": embedding[0]}}
|
| 112 |
-
)
|
| 113 |
-
updated_count += 1
|
| 114 |
-
|
| 115 |
-
# Progress update every 50 documents
|
| 116 |
-
if updated_count % 50 == 0:
|
| 117 |
-
print(f"β
Processed {updated_count}/{len(documents_without_embeddings)} documents...")
|
| 118 |
-
|
| 119 |
-
except Exception as e:
|
| 120 |
-
errors += 1
|
| 121 |
-
print(f"β Error processing document: {e}")
|
| 122 |
-
continue
|
| 123 |
-
|
| 124 |
-
# Small delay between batches
|
| 125 |
-
await asyncio.sleep(1)
|
| 126 |
-
|
| 127 |
-
# Final status
|
| 128 |
-
final_with_embeddings = await db.collection.count_documents({"embedding": {"$exists": True}})
|
| 129 |
-
print(f"π Embedding generation completed! {final_with_embeddings} documents now have embeddings")
|
| 130 |
-
|
| 131 |
-
except Exception as e:
|
| 132 |
-
print(f"β Embedding generation failed: {e}")
|
| 133 |
-
|
| 134 |
@app.get("/")
|
| 135 |
async def root():
|
| 136 |
return {"message": "RAG Chatbot API is running!", "status": "healthy"}
|
|
@@ -171,66 +95,35 @@ async def chat_with_assistant(request: ChatRequest):
|
|
| 171 |
except Exception as e:
|
| 172 |
print(f"β Error in /chat endpoint: {traceback.format_exc()}")
|
| 173 |
raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")
|
| 174 |
-
|
| 175 |
-
@app.get("/debug/vector-results")
|
| 176 |
-
async def debug_vector_results(query: str = "tops"):
|
| 177 |
-
"""See exactly what vector search returns"""
|
| 178 |
-
try:
|
| 179 |
-
# Get embeddings for the query
|
| 180 |
-
query_embedding = await rag_pipeline.get_embeddings([query])
|
| 181 |
-
print(f"π Testing vector search for: '{query}'")
|
| 182 |
-
print(f"π Embedding dimensions: {len(query_embedding[0])}")
|
| 183 |
-
|
| 184 |
-
# Perform vector search
|
| 185 |
-
results = await db.similarity_search(query_embedding[0], limit=5)
|
| 186 |
-
|
| 187 |
-
response_data = {
|
| 188 |
-
"query": query,
|
| 189 |
-
"embedding_dimensions": len(query_embedding[0]),
|
| 190 |
-
"results_found": len(results),
|
| 191 |
-
"raw_results": []
|
| 192 |
-
}
|
| 193 |
-
|
| 194 |
-
for i, doc in enumerate(results):
|
| 195 |
-
response_data["raw_results"].append({
|
| 196 |
-
"rank": i + 1,
|
| 197 |
-
"id": doc["id"],
|
| 198 |
-
"content": doc["content"],
|
| 199 |
-
"source": doc.get("source", "unknown"),
|
| 200 |
-
"metadata": doc.get("metadata", {})
|
| 201 |
-
})
|
| 202 |
-
print(f"π Result {i+1}: {doc['content'][:100]}...")
|
| 203 |
-
|
| 204 |
-
return response_data
|
| 205 |
-
|
| 206 |
-
except Exception as e:
|
| 207 |
-
return {"error": str(e), "traceback": traceback.format_exc()}
|
| 208 |
|
| 209 |
-
@app.get("/debug/
|
| 210 |
-
async def
|
| 211 |
-
"""Get
|
| 212 |
try:
|
| 213 |
-
|
| 214 |
-
|
|
|
|
|
|
|
|
|
|
| 215 |
async for doc in cursor:
|
| 216 |
-
|
| 217 |
"id": str(doc["_id"]),
|
| 218 |
-
"
|
| 219 |
"category": doc.get("category", "N/A"),
|
| 220 |
-
"description": doc.get("product_description", "N/A"),
|
| 221 |
-
"price": doc.get("final_price", "N/A"),
|
| 222 |
"has_embedding": "embedding" in doc,
|
| 223 |
-
"
|
| 224 |
-
|
| 225 |
-
|
| 226 |
|
| 227 |
return {
|
| 228 |
-
"
|
| 229 |
-
"
|
| 230 |
-
"
|
|
|
|
| 231 |
}
|
| 232 |
except Exception as e:
|
| 233 |
return {"error": str(e)}
|
|
|
|
| 234 |
|
| 235 |
if __name__ == "__main__":
|
| 236 |
uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)
|
|
|
|
| 32 |
|
| 33 |
@app.on_event("startup")
|
| 34 |
async def startup_event():
|
| 35 |
+
"""Run on application startup - WITHOUT embedding generation"""
|
|
|
|
| 36 |
try:
|
| 37 |
print("π Starting RAG Chatbot API...")
|
| 38 |
|
| 39 |
# Initialize database connection
|
| 40 |
await db.connect()
|
| 41 |
|
| 42 |
+
# Check database status (but don't generate embeddings)
|
| 43 |
+
stats = await db.get_collection_stats()
|
| 44 |
+
print(f"π Database status: {stats}")
|
| 45 |
|
| 46 |
+
if stats["documents_with_embeddings"] == 0:
|
| 47 |
+
print("β οΈ No embeddings found in database. Please pre-compute embeddings separately.")
|
| 48 |
+
print("π‘ Run the embedding generation script locally and upload to MongoDB Atlas.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
else:
|
| 50 |
+
print(f"β
Ready! Using {stats['documents_with_embeddings']} documents with embeddings from MongoDB Atlas")
|
| 51 |
|
| 52 |
print("β
RAG Chatbot API is ready!")
|
| 53 |
|
|
|
|
| 55 |
print(f"β Startup error: {e}")
|
| 56 |
raise
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
@app.get("/")
|
| 59 |
async def root():
|
| 60 |
return {"message": "RAG Chatbot API is running!", "status": "healthy"}
|
|
|
|
| 95 |
except Exception as e:
|
| 96 |
print(f"β Error in /chat endpoint: {traceback.format_exc()}")
|
| 97 |
raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
+
@app.get("/debug/database-stats")
|
| 100 |
+
async def debug_database_stats():
|
| 101 |
+
"""Get detailed database statistics"""
|
| 102 |
try:
|
| 103 |
+
stats = await db.get_collection_stats()
|
| 104 |
+
|
| 105 |
+
# Sample some documents to see their structure
|
| 106 |
+
sample_docs = []
|
| 107 |
+
cursor = db.collection.find({"embedding": {"$exists": True}}).limit(3)
|
| 108 |
async for doc in cursor:
|
| 109 |
+
sample_docs.append({
|
| 110 |
"id": str(doc["_id"]),
|
| 111 |
+
"title": doc.get("title", "N/A"),
|
| 112 |
"category": doc.get("category", "N/A"),
|
|
|
|
|
|
|
| 113 |
"has_embedding": "embedding" in doc,
|
| 114 |
+
"embedding_length": len(doc.get("embedding", [])),
|
| 115 |
+
"content_preview": f"{doc.get('title', '')} - {doc.get('product_description', '')[:50]}..."
|
| 116 |
+
})
|
| 117 |
|
| 118 |
return {
|
| 119 |
+
"database": settings.DATABASE_NAME,
|
| 120 |
+
"collection": settings.COLLECTION_NAME,
|
| 121 |
+
"statistics": stats,
|
| 122 |
+
"sample_documents_with_embeddings": sample_docs
|
| 123 |
}
|
| 124 |
except Exception as e:
|
| 125 |
return {"error": str(e)}
|
| 126 |
+
|
| 127 |
|
| 128 |
if __name__ == "__main__":
|
| 129 |
uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)
|