import os import uuid from dotenv import load_dotenv from sentence_transformers import SentenceTransformer from qdrant_client import QdrantClient from qdrant_client.http import models import uuid from datetime import datetime, timezone load_dotenv() QDRANT_URL = os.getenv("QDRANT_URL") QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") COLLECTION = "docs" model = SentenceTransformer("all-MiniLM-L6-v2") #fast embedder qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY) # Ensure collection exists (safe init .... we dont wipe on reload) def _ensure_collection(): try: coll_info = qdrant.get_collection(collection_name=COLLECTION) if not coll_info: raise Exception("Collection not found") except Exception: print(f"⚠️ Collection '{COLLECTION}' not found. Creating fresh collection...") qdrant.create_collection( collection_name=COLLECTION, vectors_config=models.VectorParams( size=384, distance=models.Distance.COSINE ), ) _ensure_collection() def embed_and_upsert(chunks, source="user", timestamp=None): if timestamp is None: timestamp = datetime.now(timezone.utc).isoformat() embeddings = model.encode(chunks).tolist() points = [] for i, (chunk, emb) in enumerate(zip(chunks, embeddings)): points.append( models.PointStruct( id=str(uuid.uuid4()), vector=emb, payload={ "text": chunk, "source": source, "timestamp": timestamp, "chunk_id": i } ) ) qdrant.upsert(collection_name=COLLECTION, points=points, wait=True) print(f"✅ Stored {len(points)} chunks in Qdrant (source={source}, ts={timestamp})") return True def search(query: str, top_k: int = 5): q_emb = model.encode([query])[0].tolist() results = qdrant.search( collection_name=COLLECTION, query_vector=q_emb, limit=top_k, with_payload=True ) return [ { "chunk": r.payload.get("text", ""), "cosine": r.score, "timestamp": r.payload.get("timestamp"), "source": r.payload.get("source") } for r in results ]