MossaabDev commited on
Commit
2b4ec83
·
verified ·
1 Parent(s): 8a9f576

Update app/rag.py

Browse files
Files changed (1) hide show
  1. app/rag.py +36 -38
app/rag.py CHANGED
@@ -1,38 +1,36 @@
1
- from app.data_loader import model
2
- from sklearn.metrics.pairwise import cosine_similarity
3
- from app.qdrant_client import client
4
-
5
- from pympler import asizeof
6
- """
7
- def find_top_5_ayahs(question: str):
8
- q_emb = model.encode(question)
9
- sims = cosine_similarity([q_emb], embeddings)[0]
10
- top_indices = sims.argsort()[-5:][::-1]
11
-
12
- results = []
13
- for i in top_indices:
14
- ayah_text = ayat[i] # single column with the ayah text
15
- results.append({
16
- "question": question, # the question from the request
17
- "answer": ayah_text # the ayah text
18
- })
19
- return results
20
- """
21
- # find top 5 ayahs from Qdrant (alternative method)
22
- def find_top_5_ayahs_qdrant(question: str):
23
- q_emb = model.encode(question).tolist()
24
- search_result = client.search(
25
- collection_name="ayahs_collection",
26
- query_vector=q_emb,
27
- limit=5
28
- )
29
-
30
- results = []
31
- for point in search_result:
32
- ayah_text = point.payload['text']
33
- results.append({
34
- "question": question,
35
- "answer": ayah_text
36
- })
37
- print("results size : ",asizeof.asizeof(results))
38
- return results
 
1
+ from app.data_loader import model
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ from app.qdrant_client import client
4
+ import pandas as pd
5
+ from pympler import asizeof
6
+
7
+ # Load Arabic ayahs
8
+ ayat_arabic = pd.read_csv("app/data/ayas.csv", encoding="utf-8")
9
+ ayat_arabic_list = ayat_arabic['arabic'].tolist()
10
+ ayat_english_list = ayat_arabic['english'].tolist() # assumes you have an 'english' column
11
+
12
+ def find_top_5_ayahs_qdrant(question: str):
13
+ q_emb = model.encode(question).tolist()
14
+ search_result = client.search(
15
+ collection_name="ayahs_collection",
16
+ query_vector=q_emb,
17
+ limit=5
18
+ )
19
+
20
+ results = []
21
+
22
+ for point in search_result:
23
+ english_ayah = point.payload['text']
24
+
25
+ # Try to find the Arabic equivalent by matching the English verse
26
+ match = ayat_arabic[ayat_arabic['english'].str.strip() == english_ayah.strip()]
27
+ arabic_ayah = match.iloc[0]['arabic'] if not match.empty else "❌ Not found"
28
+
29
+ results.append({
30
+ "question": question,
31
+ "answer": english_ayah,
32
+ "arabic": arabic_ayah
33
+ })
34
+
35
+ print("results size:", asizeof.asizeof(results))
36
+ return results