Update tools/quran_search.py
Browse files- tools/quran_search.py +10 -3
tools/quran_search.py
CHANGED
|
@@ -92,7 +92,7 @@ class QuranSearchEngine:
|
|
| 92 |
|
| 93 |
try:
|
| 94 |
self.model = SentenceTransformer(MODEL_NAME)
|
| 95 |
-
verse_texts = [v['text'] for v in self.all_verses]
|
| 96 |
self.verse_embeddings = []
|
| 97 |
for i in range(0, len(verse_texts), CHUNK_SIZE):
|
| 98 |
chunk = verse_texts[i:i + CHUNK_SIZE]
|
|
@@ -144,7 +144,10 @@ class QuranSearchEngine:
|
|
| 144 |
return self._keyword_fallback_search(query, top_k)
|
| 145 |
|
| 146 |
try:
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
| 148 |
similarities = cosine_similarity(query_embedding, self.verse_embeddings)[0]
|
| 149 |
top_indices = np.argsort(similarities)[-top_k:][::-1]
|
| 150 |
|
|
@@ -152,10 +155,12 @@ class QuranSearchEngine:
|
|
| 152 |
for idx in top_indices:
|
| 153 |
verse = self.all_verses[idx]
|
| 154 |
surah_name = next((s.get('surahNameArabicLong', s.get('name', '')) for s in self.surahs if s['id'] == verse['surah_id']), f"سورة {verse['surah_id']}")
|
|
|
|
|
|
|
| 155 |
results.append(
|
| 156 |
f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}"
|
| 157 |
)
|
| 158 |
-
return "\n\n".join(results)
|
| 159 |
except Exception as e:
|
| 160 |
self.logger.error(f"Search failed: {e}")
|
| 161 |
return "حدث خطأ أثناء البحث. جرب مرة أخرى."
|
|
@@ -166,6 +171,8 @@ class QuranSearchEngine:
|
|
| 166 |
for verse in self.all_verses:
|
| 167 |
if query_lower in verse['text'].lower():
|
| 168 |
surah_name = next((s.get('surahNameArabicLong', s.get('name', '')) for s in self.surahs if s['id'] == verse['surah_id']), f"سورة {verse['surah_id']}")
|
|
|
|
|
|
|
| 169 |
matches.append(f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}")
|
| 170 |
return "\n\n".join(matches[:top_k]) or "لا توجد نتائج مطابقة."
|
| 171 |
|
|
|
|
| 92 |
|
| 93 |
try:
|
| 94 |
self.model = SentenceTransformer(MODEL_NAME)
|
| 95 |
+
verse_texts = ["passage: " + v['text'] for v in self.all_verses] # Add prefix for e5 model
|
| 96 |
self.verse_embeddings = []
|
| 97 |
for i in range(0, len(verse_texts), CHUNK_SIZE):
|
| 98 |
chunk = verse_texts[i:i + CHUNK_SIZE]
|
|
|
|
| 144 |
return self._keyword_fallback_search(query, top_k)
|
| 145 |
|
| 146 |
try:
|
| 147 |
+
# Add context for single words or short queries
|
| 148 |
+
if len(query.split()) <= 1:
|
| 149 |
+
query = f"معنى كلمة {query}" # Add context: "meaning of the word"
|
| 150 |
+
query_embedding = self.model.encode(["query: " + query], convert_to_tensor=False)
|
| 151 |
similarities = cosine_similarity(query_embedding, self.verse_embeddings)[0]
|
| 152 |
top_indices = np.argsort(similarities)[-top_k:][::-1]
|
| 153 |
|
|
|
|
| 155 |
for idx in top_indices:
|
| 156 |
verse = self.all_verses[idx]
|
| 157 |
surah_name = next((s.get('surahNameArabicLong', s.get('name', '')) for s in self.surahs if s['id'] == verse['surah_id']), f"سورة {verse['surah_id']}")
|
| 158 |
+
if surah_name.startswith("سورة "):
|
| 159 |
+
surah_name = surah_name[len("سورة "):]
|
| 160 |
results.append(
|
| 161 |
f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}"
|
| 162 |
)
|
| 163 |
+
return "\n\n".join(results) or "لا توجد نتائج ذات صلة."
|
| 164 |
except Exception as e:
|
| 165 |
self.logger.error(f"Search failed: {e}")
|
| 166 |
return "حدث خطأ أثناء البحث. جرب مرة أخرى."
|
|
|
|
| 171 |
for verse in self.all_verses:
|
| 172 |
if query_lower in verse['text'].lower():
|
| 173 |
surah_name = next((s.get('surahNameArabicLong', s.get('name', '')) for s in self.surahs if s['id'] == verse['surah_id']), f"سورة {verse['surah_id']}")
|
| 174 |
+
if surah_name.startswith("سورة "):
|
| 175 |
+
surah_name = surah_name[len("سورة "):]
|
| 176 |
matches.append(f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}")
|
| 177 |
return "\n\n".join(matches[:top_k]) or "لا توجد نتائج مطابقة."
|
| 178 |
|