Batnini commited on
Commit
07a1614
ยท
verified ยท
1 Parent(s): 48effdd

Update tools/quran_search.py

Browse files
Files changed (1) hide show
  1. tools/quran_search.py +13 -18
tools/quran_search.py CHANGED
@@ -3,7 +3,6 @@ from sentence_transformers import SentenceTransformer
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  import numpy as np
5
  import requests
6
- from io import StringIO
7
  import logging
8
 
9
  class QuranSearchEngine:
@@ -13,26 +12,19 @@ class QuranSearchEngine:
13
  self.verse_embeddings = None
14
  self.quran_df = None
15
  self.surah_names = {
16
- 1: "ุงู„ูุงุชุญุฉ", 2: "ุงู„ุจู‚ุฑุฉ", 3: "ุขู„ ุนู…ุฑุงู†",
17
- # Add all 114 surah names here
18
  114: "ุงู„ู†ุงุณ"
19
  }
20
 
21
  def load_data(self):
22
  if not self.data_loaded:
23
  try:
24
- # Dynamic data loading
25
- url = "https://api.quran.com/api/v4/chapters"
26
- response = requests.get(url)
27
- chapters = response.json()['chapters']
28
- self.surah_names = {c['id']: c['name_arabic'] for c in chapters}
29
-
30
- # Load verses
31
  verses_url = "https://api.quran.com/api/v4/verses/by_chapter/2?language=ar&words=true"
32
- verses_response = requests.get(verses_url)
33
- verses = verses_response.json()['verses']
34
 
35
- # Process verses into DataFrame
36
  verses_data = []
37
  for verse in verses:
38
  verses_data.append({
@@ -45,19 +37,21 @@ class QuranSearchEngine:
45
  self.model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
46
  self.verse_embeddings = self.model.encode(self.quran_df['text'].tolist())
47
  self.data_loaded = True
48
-
49
  except Exception as e:
50
- logging.error(f"Error loading data: {str(e)}")
51
  self.load_backup_data()
52
 
53
  def load_backup_data(self):
54
- """Fallback to static data if API fails"""
55
  backup = [
56
  {"surah": 2, "ayah": 163, "text": "ูˆุฅู„ู‡ูƒู… ุฅู„ู‡ ูˆุงุญุฏ ู„ุง ุฅู„ู‡ ุฅู„ุง ู‡ูˆ ุงู„ุฑุญู…ู† ุงู„ุฑุญูŠู…"},
57
  {"surah": 3, "ayah": 134, "text": "ุงู„ุฐูŠู† ูŠู†ูู‚ูˆู† ููŠ ุงู„ุณุฑุงุก ูˆุงู„ุถุฑุงุก ูˆุงู„ูƒุงุธู…ูŠู† ุงู„ุบูŠุธ ูˆุงู„ุนุงููŠู† ุนู† ุงู„ู†ุงุณ ูˆุงู„ู„ู‡ ูŠุญุจ ุงู„ู…ุญุณู†ูŠู†"},
58
  {"surah": 4, "ayah": 135, "text": "ูŠุง ุฃูŠู‡ุง ุงู„ุฐูŠู† ุขู…ู†ูˆุง ูƒูˆู†ูˆุง ู‚ูˆุงู…ูŠู† ุจุงู„ู‚ุณุท ุดู‡ุฏุงุก ู„ู„ู‡ ูˆู„ูˆ ุนู„ู‰ ุฃู†ูุณูƒู… ุฃูˆ ุงู„ูˆุงู„ุฏูŠู† ูˆุงู„ุฃู‚ุฑุจูŠู†"}
59
  ]
60
  self.quran_df = pd.DataFrame(backup)
 
 
61
  self.verse_embeddings = self.model.encode(self.quran_df['text'].tolist())
62
  self.data_loaded = True
63
 
@@ -72,7 +66,7 @@ class QuranSearchEngine:
72
  for idx in top_indices:
73
  verse = self.quran_df.iloc[idx]
74
  results.append({
75
- "surah": self.surah_names.get(verse['surah'],
76
  "ayah": verse['ayah'],
77
  "text": verse['text'],
78
  "similarity": f"{similarities[idx]:.2f}",
@@ -80,6 +74,7 @@ class QuranSearchEngine:
80
  "ayah_num": verse['ayah']
81
  })
82
  return results
 
83
  except Exception as e:
84
- logging.error(f"Search error: {str(e)}")
85
  return []
 
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  import numpy as np
5
  import requests
 
6
  import logging
7
 
8
  class QuranSearchEngine:
 
12
  self.verse_embeddings = None
13
  self.quran_df = None
14
  self.surah_names = {
15
+ 1: "ุงู„ูุงุชุญุฉ", 2: "ุงู„ุจู‚ุฑุฉ", 3: "ุขู„ ุนู…ุฑุงู†",
16
+ 4: "ุงู„ู†ุณุงุก", 5: "ุงู„ู…ุงุฆุฏุฉ", 6: "ุงู„ุฃู†ุนุงู…",
17
  114: "ุงู„ู†ุงุณ"
18
  }
19
 
20
  def load_data(self):
21
  if not self.data_loaded:
22
  try:
23
+ # Try to load from API
 
 
 
 
 
 
24
  verses_url = "https://api.quran.com/api/v4/verses/by_chapter/2?language=ar&words=true"
25
+ response = requests.get(verses_url, timeout=10)
26
+ verses = response.json().get('verses', [])
27
 
 
28
  verses_data = []
29
  for verse in verses:
30
  verses_data.append({
 
37
  self.model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
38
  self.verse_embeddings = self.model.encode(self.quran_df['text'].tolist())
39
  self.data_loaded = True
40
+
41
  except Exception as e:
42
+ logging.error(f"API Error: {str(e)}")
43
  self.load_backup_data()
44
 
45
  def load_backup_data(self):
46
+ """Load backup data if API fails"""
47
  backup = [
48
  {"surah": 2, "ayah": 163, "text": "ูˆุฅู„ู‡ูƒู… ุฅู„ู‡ ูˆุงุญุฏ ู„ุง ุฅู„ู‡ ุฅู„ุง ู‡ูˆ ุงู„ุฑุญู…ู† ุงู„ุฑุญูŠู…"},
49
  {"surah": 3, "ayah": 134, "text": "ุงู„ุฐูŠู† ูŠู†ูู‚ูˆู† ููŠ ุงู„ุณุฑุงุก ูˆุงู„ุถุฑุงุก ูˆุงู„ูƒุงุธู…ูŠู† ุงู„ุบูŠุธ ูˆุงู„ุนุงููŠู† ุนู† ุงู„ู†ุงุณ ูˆุงู„ู„ู‡ ูŠุญุจ ุงู„ู…ุญุณู†ูŠู†"},
50
  {"surah": 4, "ayah": 135, "text": "ูŠุง ุฃูŠู‡ุง ุงู„ุฐูŠู† ุขู…ู†ูˆุง ูƒูˆู†ูˆุง ู‚ูˆุงู…ูŠู† ุจุงู„ู‚ุณุท ุดู‡ุฏุงุก ู„ู„ู‡ ูˆู„ูˆ ุนู„ู‰ ุฃู†ูุณูƒู… ุฃูˆ ุงู„ูˆุงู„ุฏูŠู† ูˆุงู„ุฃู‚ุฑุจูŠู†"}
51
  ]
52
  self.quran_df = pd.DataFrame(backup)
53
+ if not hasattr(self, 'model'):
54
+ self.model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
55
  self.verse_embeddings = self.model.encode(self.quran_df['text'].tolist())
56
  self.data_loaded = True
57
 
 
66
  for idx in top_indices:
67
  verse = self.quran_df.iloc[idx]
68
  results.append({
69
+ "surah": self.surah_names.get(verse['surah'], "ุณูˆุฑุฉ ุบูŠุฑ ู…ุนุฑูˆูุฉ"),
70
  "ayah": verse['ayah'],
71
  "text": verse['text'],
72
  "similarity": f"{similarities[idx]:.2f}",
 
74
  "ayah_num": verse['ayah']
75
  })
76
  return results
77
+
78
  except Exception as e:
79
+ logging.error(f"Search Error: {str(e)}")
80
  return []