MossaabDev commited on
Commit
7030047
·
verified ·
1 Parent(s): 27b3fe2

Update app/rag.py

Browse files
Files changed (1) hide show
  1. app/rag.py +7 -2
app/rag.py CHANGED
@@ -8,7 +8,8 @@ from pympler import asizeof
8
  ayat_arabic = pd.read_csv("app/data/ayas.csv", encoding="utf-8")
9
  ayat_arabic_list = ayat_arabic['arabic'].tolist()
10
  ayat_english_list = ayat_arabic['answers'].tolist() # assumes you have an 'english' column
11
-
 
12
  def find_top_5_ayahs_qdrant(question: str):
13
  q_emb = model.encode(question).tolist()
14
  search_result = client.search(
@@ -24,7 +25,11 @@ def find_top_5_ayahs_qdrant(question: str):
24
 
25
  # Try to find the Arabic equivalent by matching the English verse
26
  print(f"{english_ayah.strip}")
27
- match = ayat_arabic[ayat_arabic['answers'].str.strip() == english_ayah.strip()]
 
 
 
 
28
  arabic_ayah = match.iloc[0]['arabic'] if not match.empty else "❌ Not found"
29
 
30
  results.append({
 
8
  ayat_arabic = pd.read_csv("app/data/ayas.csv", encoding="utf-8")
9
  ayat_arabic_list = ayat_arabic['arabic'].tolist()
10
  ayat_english_list = ayat_arabic['answers'].tolist() # assumes you have an 'english' column
11
+ def normalize(s: str):
12
+ return re.sub(r'[^a-zA-Z0-9 ]', '', s).lower().strip()
13
  def find_top_5_ayahs_qdrant(question: str):
14
  q_emb = model.encode(question).tolist()
15
  search_result = client.search(
 
25
 
26
  # Try to find the Arabic equivalent by matching the English verse
27
  print(f"{english_ayah.strip}")
28
+ normalized_english_ayah = normalize(english_ayah)
29
+ normalized_df = ayat_arabic.copy()
30
+ normalized_df['normalized'] = ayat_arabic['answers'].apply(normalize)
31
+
32
+ match = normalized_df[normalized_df['normalized'] == normalized_english_ayah]
33
  arabic_ayah = match.iloc[0]['arabic'] if not match.empty else "❌ Not found"
34
 
35
  results.append({