kambris commited on
Commit
ddaedae
Β·
verified Β·
1 Parent(s): 9396938

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -15
app.py CHANGED
@@ -130,36 +130,49 @@ def lookup_dictionary(arabic_word):
130
  # Encode the Arabic word for URL
131
  encoded_word = urllib.parse.quote(clean_word)
132
 
133
- # Construct the search URL for Arabic Lexicon
134
- search_url = f"https://arabiclexicon.hawramani.com/?search={encoded_word}&type=0"
135
 
136
  # Set headers to mimic a browser request
137
  headers = {
138
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
 
 
 
 
139
  }
140
 
141
  # Make the request
142
- response = requests.get(search_url, headers=headers, timeout=10)
143
  response.raise_for_status()
144
 
145
  # Parse the HTML response
146
  soup = BeautifulSoup(response.content, 'html.parser')
147
 
148
- # Extract search results (this may need adjustment based on the site's structure)
149
  results = []
150
 
151
- # Look for search result containers (adjust selectors based on actual site structure)
152
- result_containers = soup.find_all(['div', 'tr', 'li'], class_=re.compile(r'result|entry|definition', re.I))
153
 
154
- if not result_containers:
155
- # Fallback: look for any elements containing Arabic text
156
- result_containers = soup.find_all(text=re.compile(r'[\u0600-\u06FF]+'))
157
- result_containers = [elem.parent for elem in result_containers if elem.parent][:5]
 
 
 
158
 
159
- for container in result_containers[:3]: # Limit to first 3 results
160
- text_content = container.get_text(strip=True)
161
- if text_content and len(text_content) > 10 and clean_word in text_content:
162
- results.append(text_content[:200] + "..." if len(text_content) > 200 else text_content)
 
 
 
 
 
 
163
 
164
  if results:
165
  formatted_results = f"πŸ“– **Dictionary Results for '{arabic_word}':**\n\n"
@@ -176,6 +189,71 @@ def lookup_dictionary(arabic_word):
176
  except Exception as e:
177
  return f"❌ Dictionary lookup failed: {str(e)}\n\nπŸ”— **Try manual search:** https://arabiclexicon.hawramani.com"
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  def lookup_multiple_words(arabic_text):
180
  """
181
  Lookup multiple Arabic words separated by spaces
 
130
  # Encode the Arabic word for URL
131
  encoded_word = urllib.parse.quote(clean_word)
132
 
133
+ # Construct the search URL for Arabic Lexicon with cat=9 (appears to be a specific category)
134
+ search_url = f"https://arabiclexicon.hawramani.com/search/{encoded_word}?cat=9"
135
 
136
  # Set headers to mimic a browser request
137
  headers = {
138
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
139
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
140
+ 'Accept-Language': 'en-US,en;q=0.5',
141
+ 'Accept-Encoding': 'gzip, deflate',
142
+ 'Connection': 'keep-alive'
143
  }
144
 
145
  # Make the request
146
+ response = requests.get(search_url, headers=headers, timeout=15)
147
  response.raise_for_status()
148
 
149
  # Parse the HTML response
150
  soup = BeautifulSoup(response.content, 'html.parser')
151
 
152
+ # Extract search results - based on the structure I can see
153
  results = []
154
 
155
+ # Look for the main content spans that contain the definitions
156
+ content_spans = soup.find_all('span', {'index': True})
157
 
158
+ if content_spans:
159
+ for span in content_spans[:2]: # Limit to first 2 spans
160
+ text_content = span.get_text(strip=True)
161
+ if text_content and len(text_content) > 20:
162
+ # Clean up the text and format it nicely
163
+ clean_text = text_content.replace('\n', ' ').replace(' ', ' ')
164
+ results.append(clean_text[:500] + "..." if len(clean_text) > 500 else clean_text)
165
 
166
+ # Fallback: look for any Arabic text content if spans don't work
167
+ if not results:
168
+ # Look for divs or other containers with Arabic text
169
+ arabic_text_elements = soup.find_all(text=re.compile(r'[\u0600-\u06FF]{3,}'))
170
+
171
+ for element in arabic_text_elements[:3]:
172
+ parent_text = element.parent.get_text(strip=True) if element.parent else str(element)
173
+ if len(parent_text) > 30 and clean_word in parent_text:
174
+ clean_text = parent_text.replace('\n', ' ').replace(' ', ' ')
175
+ results.append(clean_text[:400] + "..." if len(clean_text) > 400 else clean_text)
176
 
177
  if results:
178
  formatted_results = f"πŸ“– **Dictionary Results for '{arabic_word}':**\n\n"
 
189
  except Exception as e:
190
  return f"❌ Dictionary lookup failed: {str(e)}\n\nπŸ”— **Try manual search:** https://arabiclexicon.hawramani.com"
191
 
192
+
193
+ def lookup_dictionary_alternative(arabic_word):
194
+ """
195
+ Alternative lookup method using the old search format as fallback
196
+ """
197
+ if not arabic_word or not arabic_word.strip():
198
+ return "No word provided for lookup."
199
+
200
+ try:
201
+ clean_word = clean_arabic_text(arabic_word)
202
+ encoded_word = urllib.parse.quote(clean_word)
203
+
204
+ # Try the original search format as fallback
205
+ search_url = f"https://arabiclexicon.hawramani.com/?search={encoded_word}&cat=9"
206
+
207
+ headers = {
208
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
209
+ }
210
+
211
+ response = requests.get(search_url, headers=headers, timeout=15)
212
+ response.raise_for_status()
213
+
214
+ soup = BeautifulSoup(response.content, 'html.parser')
215
+
216
+ # Look for search results in tables or divs
217
+ results = []
218
+
219
+ # Try to find table rows or result containers
220
+ result_elements = soup.find_all(['tr', 'div', 'p'], string=re.compile(r'[\u0600-\u06FF]+'))
221
+
222
+ for element in result_elements[:3]:
223
+ text_content = element.get_text(strip=True)
224
+ if text_content and len(text_content) > 20 and clean_word in text_content:
225
+ clean_text = text_content.replace('\n', ' ').replace(' ', ' ')
226
+ results.append(clean_text[:400] + "..." if len(clean_text) > 400 else clean_text)
227
+
228
+ if results:
229
+ formatted_results = f"πŸ“– **Dictionary Results for '{arabic_word}' (Alternative Search):**\n\n"
230
+ for i, result in enumerate(results, 1):
231
+ formatted_results += f"**{i}.** {result}\n\n"
232
+
233
+ formatted_results += f"\nπŸ”— **Full results:** [View on Arabic Lexicon]({search_url})"
234
+ return formatted_results
235
+ else:
236
+ return f"πŸ“– No results found with alternative search for '{arabic_word}'.\n\nπŸ”— **Try manual search:** [Search on Arabic Lexicon]({search_url})"
237
+
238
+ except Exception as e:
239
+ return f"❌ Alternative dictionary lookup failed: {str(e)}"
240
+
241
+
242
+ def lookup_dictionary_with_fallback(arabic_word):
243
+ """
244
+ Main lookup function that tries the new format first, then falls back to the old format
245
+ """
246
+ # Try the new format first
247
+ result = lookup_dictionary(arabic_word)
248
+
249
+ # If no results found, try the alternative format
250
+ if "No dictionary results found" in result:
251
+ alternative_result = lookup_dictionary_alternative(arabic_word)
252
+ if "No results found" not in alternative_result:
253
+ return alternative_result
254
+
255
+ return result
256
+
257
  def lookup_multiple_words(arabic_text):
258
  """
259
  Lookup multiple Arabic words separated by spaces