Spaces:
Runtime error
Runtime error
Update app/rag.py
Browse files- app/rag.py +7 -2
app/rag.py
CHANGED
|
@@ -8,7 +8,8 @@ from pympler import asizeof
|
|
| 8 |
ayat_arabic = pd.read_csv("app/data/ayas.csv", encoding="utf-8")
|
| 9 |
ayat_arabic_list = ayat_arabic['arabic'].tolist()
|
| 10 |
ayat_english_list = ayat_arabic['answers'].tolist() # assumes you have an 'english' column
|
| 11 |
-
|
|
|
|
| 12 |
def find_top_5_ayahs_qdrant(question: str):
|
| 13 |
q_emb = model.encode(question).tolist()
|
| 14 |
search_result = client.search(
|
|
@@ -24,7 +25,11 @@ def find_top_5_ayahs_qdrant(question: str):
|
|
| 24 |
|
| 25 |
# Try to find the Arabic equivalent by matching the English verse
|
| 26 |
print(f"{english_ayah.strip}")
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
arabic_ayah = match.iloc[0]['arabic'] if not match.empty else "❌ Not found"
|
| 29 |
|
| 30 |
results.append({
|
|
|
|
| 8 |
ayat_arabic = pd.read_csv("app/data/ayas.csv", encoding="utf-8")
|
| 9 |
ayat_arabic_list = ayat_arabic['arabic'].tolist()
|
| 10 |
ayat_english_list = ayat_arabic['answers'].tolist() # assumes you have an 'english' column
|
| 11 |
+
def normalize(s: str):
|
| 12 |
+
return re.sub(r'[^a-zA-Z0-9 ]', '', s).lower().strip()
|
| 13 |
def find_top_5_ayahs_qdrant(question: str):
|
| 14 |
q_emb = model.encode(question).tolist()
|
| 15 |
search_result = client.search(
|
|
|
|
| 25 |
|
| 26 |
# Try to find the Arabic equivalent by matching the English verse
|
| 27 |
print(f"{english_ayah.strip}")
|
| 28 |
+
normalized_english_ayah = normalize(english_ayah)
|
| 29 |
+
normalized_df = ayat_arabic.copy()
|
| 30 |
+
normalized_df['normalized'] = ayat_arabic['answers'].apply(normalize)
|
| 31 |
+
|
| 32 |
+
match = normalized_df[normalized_df['normalized'] == normalized_english_ayah]
|
| 33 |
arabic_ayah = match.iloc[0]['arabic'] if not match.empty else "❌ Not found"
|
| 34 |
|
| 35 |
results.append({
|