Batnini commited on
Commit
852d7f8
·
verified ·
1 Parent(s): e12f2fb

Update tools/quran_search.py

Browse files
Files changed (1) hide show
  1. tools/quran_search.py +19 -83
tools/quran_search.py CHANGED
@@ -1,89 +1,25 @@
1
- import logging
2
  import requests
3
- from sentence_transformers import SentenceTransformer
4
- from sklearn.metrics.pairwise import cosine_similarity
5
- import numpy as np
6
 
7
- class QuranSearchEngine:
8
- def __init__(self):
9
- self.api_url = "https://api.quran.com/api/v3/search"
10
- self.model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2', device='cpu')
11
- self.embedding_cache = {}
12
- self.min_query_length = 2
13
-
14
- # Configure logging
15
- logging.basicConfig(
16
- level=logging.INFO,
17
- format='%(asctime)s - %(levelname)s - %(message)s'
18
- )
19
-
20
- def _fetch_verses(self, query, limit=5):
21
- """Fetch verses from Quran API with error handling"""
22
  try:
23
- response = requests.get(
24
- f"{self.api_url}?q={query}&size={limit}",
25
- timeout=15,
26
- headers={'Accept': 'application/json'}
27
- )
28
- response.raise_for_status()
29
- return response.json().get('results', [])
30
- except requests.exceptions.RequestException as e:
31
- logging.error(f"API request failed: {str(e)}")
32
- return []
33
- except ValueError as e:
34
- logging.error(f"Invalid API response: {str(e)}")
35
- return []
36
-
37
- def _process_verse(self, verse, similarity):
38
- """Standardize verse format"""
39
- return {
40
- 'surah': verse.get('surah_name', ''),
41
- 'ayah': verse.get('verse_id', 0),
42
- 'text': verse.get('text', ''),
43
- 'similarity': f"{similarity:.2f}",
44
- 'surah_num': verse.get('surah_id', 0),
45
- 'ayah_num': verse.get('verse_id', 0)
46
- }
47
-
48
- def search(self, query, top_k=5):
49
- """Main search method with validation and caching"""
50
- # Validate input
51
- query = str(query).strip()
52
- if len(query) < self.min_query_length:
53
  return []
54
 
 
 
55
  try:
56
- # 1. Get initial results from API
57
- verses = self._fetch_verses(query, top_k)
58
- if not verses:
59
- return []
60
-
61
- # 2. Prepare texts for embedding
62
- texts = [v['text'] for v in verses]
63
-
64
- # 3. Get or create embeddings
65
- if query in self.embedding_cache:
66
- query_embedding = self.embedding_cache[query]
67
- else:
68
- query_embedding = self.model.encode([query])[0]
69
- self.embedding_cache[query] = query_embedding
70
-
71
- verse_embeddings = self.model.encode(texts)
72
-
73
- # 4. Calculate similarities
74
- similarities = cosine_similarity(
75
- [query_embedding],
76
- verse_embeddings
77
- )[0]
78
-
79
- # 5. Combine and sort results
80
- results = [
81
- self._process_verse(verse, similarities[i])
82
- for i, verse in enumerate(verses)
83
- ]
84
-
85
- return sorted(results, key=lambda x: float(x['similarity']), reverse=True)
86
-
87
- except Exception as e:
88
- logging.error(f"Search processing failed: {str(e)}")
89
- return []
 
 
1
  import requests
 
 
 
2
 
3
+ class QuranFetcher:
4
+ BASE_URL = "https://api.quran.com/api/v3"
5
+
6
+ def get_all_surahs(self):
7
+ """Get only surah list - nothing more"""
 
 
 
 
 
 
 
 
 
 
8
  try:
9
+ response = requests.get(f"{self.BASE_URL}/chapters", timeout=5)
10
+ return response.json().get('chapters', [])
11
+ except:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  return []
13
 
14
+ def get_surah_text(self, surah_id):
15
+ """Get pure Arabic text for one surah - nothing more"""
16
  try:
17
+ response = requests.get(
18
+ f"{self.BASE_URL}/chapters/{surah_id}/verses",
19
+ params={"language": "ar"},
20
+ timeout=10
21
+ )
22
+ verses = response.json().get('verses', [])
23
+ return "\n".join([v['text_uthmani'] for v in verses])
24
+ except:
25
+ return "⚠️ تعذر تحميل السورة"