Spaces:
Runtime error
Runtime error
| import os | |
| import uuid | |
| from dotenv import load_dotenv | |
| from sentence_transformers import SentenceTransformer | |
| from qdrant_client import QdrantClient | |
| from qdrant_client.http import models | |
| import uuid | |
| from datetime import datetime, timezone | |
| load_dotenv() | |
| QDRANT_URL = os.getenv("QDRANT_URL") | |
| QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") | |
| COLLECTION = "docs" | |
| model = SentenceTransformer("all-MiniLM-L6-v2") #fast embedder | |
| qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY) | |
| # Ensure collection exists (safe init .... we dont wipe on reload) | |
| def _ensure_collection(): | |
| try: | |
| coll_info = qdrant.get_collection(collection_name=COLLECTION) | |
| if not coll_info: | |
| raise Exception("Collection not found") | |
| except Exception: | |
| print(f"⚠️ Collection '{COLLECTION}' not found. Creating fresh collection...") | |
| qdrant.create_collection( | |
| collection_name=COLLECTION, | |
| vectors_config=models.VectorParams( | |
| size=384, | |
| distance=models.Distance.COSINE | |
| ), | |
| ) | |
| _ensure_collection() | |
| def embed_and_upsert(chunks, source="user", timestamp=None): | |
| if timestamp is None: | |
| timestamp = datetime.now(timezone.utc).isoformat() | |
| embeddings = model.encode(chunks).tolist() | |
| points = [] | |
| for i, (chunk, emb) in enumerate(zip(chunks, embeddings)): | |
| points.append( | |
| models.PointStruct( | |
| id=str(uuid.uuid4()), | |
| vector=emb, | |
| payload={ | |
| "text": chunk, | |
| "source": source, | |
| "timestamp": timestamp, | |
| "chunk_id": i | |
| } | |
| ) | |
| ) | |
| qdrant.upsert(collection_name=COLLECTION, points=points, wait=True) | |
| print(f"✅ Stored {len(points)} chunks in Qdrant (source={source}, ts={timestamp})") | |
| return True | |
| def search(query: str, top_k: int = 5): | |
| q_emb = model.encode([query])[0].tolist() | |
| results = qdrant.search( | |
| collection_name=COLLECTION, | |
| query_vector=q_emb, | |
| limit=top_k, | |
| with_payload=True | |
| ) | |
| return [ | |
| { | |
| "chunk": r.payload.get("text", ""), | |
| "cosine": r.score, | |
| "timestamp": r.payload.get("timestamp"), | |
| "source": r.payload.get("source") | |
| } | |
| for r in results | |
| ] | |