Batnini commited on
Commit
c36492f
·
verified ·
1 Parent(s): 79c1095

Update tools/quran_search.py

Browse files
Files changed (1) hide show
  1. tools/quran_search.py +15 -132
tools/quran_search.py CHANGED
@@ -1,3 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import requests
2
  import logging
3
  import numpy as np
@@ -26,135 +40,4 @@ class QuranSearchEngine:
26
  print(f"Initialization error: {e}", file=sys.stderr)
27
  self._load_fallback_data() # Ensure minimal startup
28
 
29
- def _load_full_quran(self):
30
- max_retries = 5 # Increased retries
31
- for attempt in range(max_retries):
32
- try:
33
- response = requests.get(f"{self.api_url}surah.json", timeout=15) # Increased timeout
34
- response.raise_for_status()
35
- self.surahs = response.json()
36
- for i, s in enumerate(self.surahs):
37
- s['id'] = i + 1
38
- break
39
- except Exception as e:
40
- self.logger.error(f"Attempt {attempt + 1}/{max_retries} failed to fetch surahs: {e}")
41
- if attempt == max_retries - 1:
42
- self._load_fallback_data()
43
- time.sleep(2 ** attempt)
44
-
45
- if self.surahs:
46
- for surah in self.surahs:
47
- surah_id = surah['id']
48
- try:
49
- response = requests.get(f"{self.api_url}{surah_id}.json", timeout=15)
50
- response.raise_for_status()
51
- data = response.json()
52
- verses = data['arabic1']
53
- for verse_num, text in enumerate(verses, start=1):
54
- self.all_verses.append({
55
- 'surah_id': surah_id,
56
- 'verse_num': verse_num,
57
- 'text': text
58
- })
59
- except Exception as e:
60
- self.logger.error(f"Failed to fetch verses for surah {surah_id}: {e}")
61
-
62
- def _load_fallback_data(self):
63
- self.logger.warning("Falling back to minimal data due to API failure")
64
- self.surahs = self._load_fallback_surahs()
65
- self.all_verses = [
66
- {'surah_id': 1, 'verse_num': 1, 'text': "بِسْمِ ٱللَّهِ ٱلرَّحْمَـٰنِ ٱلرَّحِيمِ"},
67
- {'surah_id': 1, 'verse_num': 2, 'text': "ٱلْحَمْدُ لِلَّهِ رَبِّ ٱلْعَٰلَمِينَ"}
68
- ]
69
-
70
- def _load_all_verses_and_embeddings(self):
71
- if not self.all_verses:
72
- return
73
-
74
- try:
75
- print("Attempting to load model...", file=sys.stderr) # Debug
76
- self.model = SentenceTransformer(MODEL_NAME)
77
- print("Model loaded successfully", file=sys.stderr) # Debug
78
- verse_texts = [v['text'] for v in self.all_verses]
79
- self.verse_embeddings = []
80
- for i in range(0, len(verse_texts), CHUNK_SIZE):
81
- chunk = verse_texts[i:i + CHUNK_SIZE]
82
- embeddings = self.model.encode(chunk, convert_to_tensor=False)
83
- self.verse_embeddings.append(embeddings)
84
- self.verse_embeddings = np.vstack(self.verse_embeddings)
85
- print("Embeddings computed successfully", file=sys.stderr) # Debug
86
- except Exception as e:
87
- self.logger.error(f"Failed to compute embeddings: {e}", exc_info=True)
88
- self.verse_embeddings = None
89
- self.logger.warning("Falling back to keyword-based search due to embedding failure")
90
-
91
- def get_surahs(self):
92
- if self.surahs:
93
- return [(s['surahNameArabicLong'], s['id']) for s in self.surahs]
94
- return self._load_fallback_surahs()
95
-
96
- def get_surah_text(self, surah_id):
97
- max_retries = 3
98
- for attempt in range(max_retries):
99
- try:
100
- response = requests.get(f"{self.api_url}{surah_id}.json", timeout=15)
101
- response.raise_for_status()
102
- data = response.json()
103
- verses = data['arabic1']
104
- return "\n\n".join(f"آية {i + 1}: {v}" for i, v in enumerate(verses))
105
- except Exception as e:
106
- self.logger.error(f"Attempt {attempt + 1}/{max_retries} failed to fetch surah {surah_id}: {e}")
107
- if attempt == max_retries - 1:
108
- return self._load_fallback_verse()
109
- time.sleep(2 ** attempt)
110
-
111
- def search_verses(self, query, top_k=5):
112
- if self.model is None:
113
- try:
114
- print("Loading model on demand...", file=sys.stderr)
115
- self.model = SentenceTransformer(MODEL_NAME)
116
- print("Model loaded successfully", file=sys.stderr)
117
- verse_texts = [v['text'] for v in self.all_verses]
118
- self.verse_embeddings = []
119
- for i in range(0, len(verse_texts), CHUNK_SIZE):
120
- chunk = verse_texts[i:i + CHUNK_SIZE]
121
- embeddings = self.model.encode(chunk, convert_to_tensor=False)
122
- self.verse_embeddings.append(embeddings)
123
- self.verse_embeddings = np.vstack(self.verse_embeddings)
124
- except Exception as e:
125
- self.logger.error(f"Failed to load model on demand: {e}", exc_info=True)
126
- self.verse_embeddings = None
127
-
128
- if self.verse_embeddings is None or not self.all_verses:
129
- return self._keyword_fallback_search(query, top_k)
130
-
131
- try:
132
- print(f"Encoding query: {query}", file=sys.stderr) # Debug
133
- query_embedding = self.model.encode([query], convert_to_tensor=False)
134
- similarities = cosine_similarity(query_embedding, self.verse_embeddings)[0]
135
- top_indices = np.argsort(similarities)[-top_k:][::-1]
136
-
137
- results = []
138
- for idx in top_indices:
139
- verse = self.all_verses[idx]
140
- surah_name = self.surahs[verse['surah_id'] - 1]['surahNameArabicLong']
141
- results.append(f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}")
142
- return "\n\n".join(results)
143
- except Exception as e:
144
- self.logger.error(f"Search failed: {e}", exc_info=True)
145
- return "حدث خطأ أثناء البحث. جرب مرة أخرى."
146
-
147
- def _keyword_fallback_search(self, query, top_k=5):
148
- query_lower = query.lower()
149
- matches = []
150
- for verse in self.all_verses:
151
- if query_lower in verse['text'].lower():
152
- surah_name = self.surahs[verse['surah_id'] - 1]['surahNameArabicLong'] if self.surahs else f"سورة {verse['surah_id']}"
153
- matches.append(f"سورة {surah_name} - آية {verse['verse_num']}:\n{verse['text']}")
154
- return "\n\n".join(matches[:top_k]) or "لا توجد نتائج مطابقة."
155
-
156
- def _load_fallback_surahs(self):
157
- return [("الفاتحة", 1), ("البقرة", 2), ("آل عمران", 3)]
158
-
159
- def _load_fallback_verse(self):
160
- return "بسم الله الرحمن الرحيم\nالله لا إله إلا هو الحي القيوم"
 
1
+ # tools/quran_search.py
2
+
3
+ # --- Patch for huggingface_hub cached_download removal ---
4
+ import huggingface_hub
5
+ if not hasattr(huggingface_hub, "cached_download"):
6
+ try:
7
+ from huggingface_hub import hf_hub_download
8
+ huggingface_hub.cached_download = hf_hub_download
9
+ except ImportError:
10
+ raise ImportError(
11
+ "huggingface_hub version is incompatible and cannot be patched automatically."
12
+ )
13
+ # ---------------------------------------------------------
14
+
15
  import requests
16
  import logging
17
  import numpy as np
 
40
  print(f"Initialization error: {e}", file=sys.stderr)
41
  self._load_fallback_data() # Ensure minimal startup
42
 
43
+ # ... rest of your class code unchanged ...