Spaces:
Sleeping
Sleeping
| import os | |
| import uuid | |
| import pandas as pd | |
| from qdrant_client import QdrantClient, models | |
| from sentence_transformers import SentenceTransformer | |
| # === Step 1: Ensure Qdrant directory exists === | |
| if not os.path.exists("qdrant_data"): | |
| os.makedirs("qdrant_data") | |
| # === Step 2: Load dataset === | |
| data = pd.read_csv("math_dataset (2).csv") # Ensure this CSV is present and formatted correctly | |
| # === Step 3: Encode questions === | |
| embedding_model = SentenceTransformer("intfloat/e5-large") | |
| vectors = embedding_model.encode(data["problem"].tolist(), show_progress_bar=True) | |
| # === Step 4: Initialize local Qdrant client === | |
| client = QdrantClient(path="qdrant_data") | |
| # === Step 5: Create collection (recreate ensures it's fresh) === | |
| collection_name = "math_problems" | |
| client.recreate_collection( | |
| collection_name=collection_name, | |
| vectors_config=models.VectorParams(size=vectors.shape[1], distance=models.Distance.COSINE) | |
| ) | |
| # === Step 6: Prepare payload and upload with UUIDs === | |
| payload = data.to_dict(orient="records") | |
| ids = [str(uuid.uuid4()) for _ in range(len(vectors))] | |
| client.upload_collection( | |
| collection_name=collection_name, | |
| vectors=vectors, | |
| payload=payload, | |
| ids=ids | |
| ) | |
| print("β Qdrant vector store created and populated successfully in `qdrant_data/`.") | |