Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer | |
| import torch | |
| from app.qdrant_client import client | |
| from qdrant_client.http import models | |
| from pympler import asizeof | |
| print("Loading model and data...") | |
| # --- Setup device --- | |
| device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| print(f"Using device: {device}") | |
| # --- Load model --- | |
| model = SentenceTransformer("MossaabDev/Quran_embed_V2.2", device=device) | |
| print("Model size:", asizeof.asizeof(model)) | |
| # --- Load ayahs from ayas.csv --- | |
| df = pd.read_csv("app/data/ayas.csv", encoding="utf-8") | |
| # Ensure expected columns | |
| if not {'answers', 'arabic'}.issubset(df.columns): | |
| raise ValueError("β 'ayas.csv' must contain 'answers' and 'arabic' columns.") | |
| # Remove duplicates and NaN | |
| df = df.dropna(subset=['answers', 'arabic']).drop_duplicates(subset=['answers']) | |
| ayat = df['answers'].tolist() | |
| print(f"Total unique ayat loaded: {len(ayat)}") | |
| print("β Model and data ready.") | |
| # --- Check if collection exists --- | |
| collections = [c.name for c in client.get_collections().collections] | |
| if "ayahs_collection" not in collections: | |
| print("Creating Qdrant collection and uploading embeddings...") | |
| embeddings = model.encode(ayat, convert_to_tensor=False, show_progress_bar=True).tolist() | |
| client.recreate_collection( | |
| collection_name="ayahs_collection", | |
| vectors_config=models.VectorParams( | |
| size=len(embeddings[0]), | |
| distance=models.Distance.COSINE | |
| ), | |
| ) | |
| points = [ | |
| models.PointStruct( | |
| id=idx, | |
| vector=emb, | |
| payload={ | |
| "text": ayah, | |
| "arabic": df.iloc[idx]['arabic'] | |
| } | |
| ) | |
| for idx, (emb, ayah) in enumerate(zip(embeddings, ayat)) | |
| ] | |
| client.upsert(collection_name="ayahs_collection", points=points) | |
| print("β Embeddings uploaded to Qdrant.") | |
| else: | |
| print("β Collection already exists, skipping upload.") | |