Spaces:
Runtime error
Runtime error
| from app.data_loader import model | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from app.qdrant_client import client | |
| import pandas as pd | |
| from pympler import asizeof | |
| import re | |
| # Load Arabic ayahs | |
| ayat_arabic = pd.read_csv("app/data/ayas.csv", encoding="utf-8") | |
| ayat_arabic_list = ayat_arabic['arabic'].tolist() | |
| ayat_english_list = ayat_arabic['answers'].tolist() # assumes you have an 'english' column | |
| def normalize(s: str): | |
| return re.sub(r'[^a-zA-Z0-9 ]', '', s).lower().strip() | |
| def find_top_5_ayahs_qdrant(question: str): | |
| q_emb = model.encode(question).tolist() | |
| search_result = client.search( | |
| collection_name="ayahs_collection", | |
| query_vector=q_emb, | |
| limit=5 | |
| ) | |
| results = [] | |
| for point in search_result: | |
| english_ayah = point.payload['text'] | |
| # Try to find the Arabic equivalent by matching the English verse | |
| print(f"{english_ayah.strip}") | |
| normalized_english_ayah = normalize(english_ayah) | |
| normalized_df = ayat_arabic.copy() | |
| normalized_df['normalized'] = ayat_arabic['answers'].apply(normalize) | |
| match = normalized_df[normalized_df['normalized'] == normalized_english_ayah] | |
| arabic_ayah = match.iloc[0]['arabic'] if not match.empty else "❌ Not found" | |
| link = match.iloc[0]['link'] if not match.empty else "https://quran.com" | |
| results.append({ | |
| "question": question, | |
| "answer": english_ayah, | |
| "arabic": arabic_ayah, | |
| "link": link | |
| }) | |
| print("results size:", asizeof.asizeof(results)) | |
| return results | |