File size: 1,594 Bytes
2b4ec83
 
 
 
 
64ce75f
2b4ec83
 
 
 
e0a4c9b
7030047
 
2b4ec83
 
 
 
 
 
 
 
 
 
 
 
 
 
04ce96d
7030047
 
 
 
 
2b4ec83
9823c9b
2b4ec83
 
 
 
9823c9b
 
2b4ec83
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from app.data_loader import model
from sklearn.metrics.pairwise import cosine_similarity
from app.qdrant_client import client
import pandas as pd
from pympler import asizeof
import re

# Load Arabic ayahs
ayat_arabic = pd.read_csv("app/data/ayas.csv", encoding="utf-8")
ayat_arabic_list = ayat_arabic['arabic'].tolist()
ayat_english_list = ayat_arabic['answers'].tolist()  # assumes you have an 'english' column
def normalize(s: str):
    return re.sub(r'[^a-zA-Z0-9 ]', '', s).lower().strip()
def find_top_5_ayahs_qdrant(question: str):
    q_emb = model.encode(question).tolist()
    search_result = client.search(
        collection_name="ayahs_collection",
        query_vector=q_emb,
        limit=5
    )

    results = []

    for point in search_result:
        english_ayah = point.payload['text']

        # Try to find the Arabic equivalent by matching the English verse
        print(f"{english_ayah.strip}")
        normalized_english_ayah = normalize(english_ayah)
        normalized_df = ayat_arabic.copy()
        normalized_df['normalized'] = ayat_arabic['answers'].apply(normalize)

        match = normalized_df[normalized_df['normalized'] == normalized_english_ayah]
        arabic_ayah = match.iloc[0]['arabic'] if not match.empty else "❌ Not found"
        link = match.iloc[0]['link'] if not match.empty else "https://quran.com"

        results.append({
            "question": question,
            "answer": english_ayah,
            "arabic": arabic_ayah,
            "link": link
        })

    print("results size:", asizeof.asizeof(results))
    return results