Spaces:
Runtime error
Runtime error
File size: 1,958 Bytes
38bac22 7a1f92c 38bac22 7a1f92c 38bac22 7a1f92c eebabd3 7a1f92c 38bac22 7a1f92c 38bac22 7a1f92c 38bac22 7a1f92c 38bac22 7a1f92c 38bac22 7a1f92c 38bac22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import pandas as pd
from sentence_transformers import SentenceTransformer
import torch
from app.qdrant_client import client
from qdrant_client.http import models
from pympler import asizeof
print("Loading model and data...")
# --- Setup device ---
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")
# --- Load model ---
model = SentenceTransformer("MossaabDev/Quran_embed_V2.2", device=device)
print("Model size:", asizeof.asizeof(model))
# --- Load ayahs from ayas.csv ---
df = pd.read_csv("app/data/ayas.csv", encoding="utf-8")
# Ensure expected columns
if not {'answers', 'arabic'}.issubset(df.columns):
raise ValueError("❌ 'ayas.csv' must contain 'answers' and 'arabic' columns.")
# Remove duplicates and NaN
df = df.dropna(subset=['answers', 'arabic']).drop_duplicates(subset=['answers'])
ayat = df['answers'].tolist()
print(f"Total unique ayat loaded: {len(ayat)}")
print("✅ Model and data ready.")
# --- Check if collection exists ---
collections = [c.name for c in client.get_collections().collections]
if "ayahs_collection" not in collections:
print("Creating Qdrant collection and uploading embeddings...")
embeddings = model.encode(ayat, convert_to_tensor=False, show_progress_bar=True).tolist()
client.recreate_collection(
collection_name="ayahs_collection",
vectors_config=models.VectorParams(
size=len(embeddings[0]),
distance=models.Distance.COSINE
),
)
points = [
models.PointStruct(
id=idx,
vector=emb,
payload={
"text": ayah,
"arabic": df.iloc[idx]['arabic']
}
)
for idx, (emb, ayah) in enumerate(zip(embeddings, ayat))
]
client.upsert(collection_name="ayahs_collection", points=points)
print("✅ Embeddings uploaded to Qdrant.")
else:
print("✅ Collection already exists, skipping upload.")
|