GraphRAG-Live / vec.py
aayush226's picture
Upload 10 files
4d9fcca verified
import os
import uuid
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http import models
import uuid
from datetime import datetime, timezone
load_dotenv()
QDRANT_URL = os.getenv("QDRANT_URL")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
COLLECTION = "docs"
model = SentenceTransformer("all-MiniLM-L6-v2") #fast embedder
qdrant = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
# Ensure collection exists (safe init .... we dont wipe on reload)
def _ensure_collection():
try:
coll_info = qdrant.get_collection(collection_name=COLLECTION)
if not coll_info:
raise Exception("Collection not found")
except Exception:
print(f"⚠️ Collection '{COLLECTION}' not found. Creating fresh collection...")
qdrant.create_collection(
collection_name=COLLECTION,
vectors_config=models.VectorParams(
size=384,
distance=models.Distance.COSINE
),
)
_ensure_collection()
def embed_and_upsert(chunks, source="user", timestamp=None):
if timestamp is None:
timestamp = datetime.now(timezone.utc).isoformat()
embeddings = model.encode(chunks).tolist()
points = []
for i, (chunk, emb) in enumerate(zip(chunks, embeddings)):
points.append(
models.PointStruct(
id=str(uuid.uuid4()),
vector=emb,
payload={
"text": chunk,
"source": source,
"timestamp": timestamp,
"chunk_id": i
}
)
)
qdrant.upsert(collection_name=COLLECTION, points=points, wait=True)
print(f"✅ Stored {len(points)} chunks in Qdrant (source={source}, ts={timestamp})")
return True
def search(query: str, top_k: int = 5):
q_emb = model.encode([query])[0].tolist()
results = qdrant.search(
collection_name=COLLECTION,
query_vector=q_emb,
limit=top_k,
with_payload=True
)
return [
{
"chunk": r.payload.get("text", ""),
"cosine": r.score,
"timestamp": r.payload.get("timestamp"),
"source": r.payload.get("source")
}
for r in results
]