Update app.py
Browse files
app.py
CHANGED
|
@@ -130,36 +130,49 @@ def lookup_dictionary(arabic_word):
|
|
| 130 |
# Encode the Arabic word for URL
|
| 131 |
encoded_word = urllib.parse.quote(clean_word)
|
| 132 |
|
| 133 |
-
# Construct the search URL for Arabic Lexicon
|
| 134 |
-
search_url = f"https://arabiclexicon.hawramani.com/
|
| 135 |
|
| 136 |
# Set headers to mimic a browser request
|
| 137 |
headers = {
|
| 138 |
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
}
|
| 140 |
|
| 141 |
# Make the request
|
| 142 |
-
response = requests.get(search_url, headers=headers, timeout=
|
| 143 |
response.raise_for_status()
|
| 144 |
|
| 145 |
# Parse the HTML response
|
| 146 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 147 |
|
| 148 |
-
# Extract search results
|
| 149 |
results = []
|
| 150 |
|
| 151 |
-
# Look for
|
| 152 |
-
|
| 153 |
|
| 154 |
-
if
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
-
for
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
if results:
|
| 165 |
formatted_results = f"π **Dictionary Results for '{arabic_word}':**\n\n"
|
|
@@ -176,6 +189,71 @@ def lookup_dictionary(arabic_word):
|
|
| 176 |
except Exception as e:
|
| 177 |
return f"β Dictionary lookup failed: {str(e)}\n\nπ **Try manual search:** https://arabiclexicon.hawramani.com"
|
| 178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
def lookup_multiple_words(arabic_text):
|
| 180 |
"""
|
| 181 |
Lookup multiple Arabic words separated by spaces
|
|
|
|
| 130 |
# Encode the Arabic word for URL
|
| 131 |
encoded_word = urllib.parse.quote(clean_word)
|
| 132 |
|
| 133 |
+
# Construct the search URL for Arabic Lexicon with cat=9 (appears to be a specific category)
|
| 134 |
+
search_url = f"https://arabiclexicon.hawramani.com/search/{encoded_word}?cat=9"
|
| 135 |
|
| 136 |
# Set headers to mimic a browser request
|
| 137 |
headers = {
|
| 138 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
| 139 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
| 140 |
+
'Accept-Language': 'en-US,en;q=0.5',
|
| 141 |
+
'Accept-Encoding': 'gzip, deflate',
|
| 142 |
+
'Connection': 'keep-alive'
|
| 143 |
}
|
| 144 |
|
| 145 |
# Make the request
|
| 146 |
+
response = requests.get(search_url, headers=headers, timeout=15)
|
| 147 |
response.raise_for_status()
|
| 148 |
|
| 149 |
# Parse the HTML response
|
| 150 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 151 |
|
| 152 |
+
# Extract search results - based on the structure I can see
|
| 153 |
results = []
|
| 154 |
|
| 155 |
+
# Look for the main content spans that contain the definitions
|
| 156 |
+
content_spans = soup.find_all('span', {'index': True})
|
| 157 |
|
| 158 |
+
if content_spans:
|
| 159 |
+
for span in content_spans[:2]: # Limit to first 2 spans
|
| 160 |
+
text_content = span.get_text(strip=True)
|
| 161 |
+
if text_content and len(text_content) > 20:
|
| 162 |
+
# Clean up the text and format it nicely
|
| 163 |
+
clean_text = text_content.replace('\n', ' ').replace(' ', ' ')
|
| 164 |
+
results.append(clean_text[:500] + "..." if len(clean_text) > 500 else clean_text)
|
| 165 |
|
| 166 |
+
# Fallback: look for any Arabic text content if spans don't work
|
| 167 |
+
if not results:
|
| 168 |
+
# Look for divs or other containers with Arabic text
|
| 169 |
+
arabic_text_elements = soup.find_all(text=re.compile(r'[\u0600-\u06FF]{3,}'))
|
| 170 |
+
|
| 171 |
+
for element in arabic_text_elements[:3]:
|
| 172 |
+
parent_text = element.parent.get_text(strip=True) if element.parent else str(element)
|
| 173 |
+
if len(parent_text) > 30 and clean_word in parent_text:
|
| 174 |
+
clean_text = parent_text.replace('\n', ' ').replace(' ', ' ')
|
| 175 |
+
results.append(clean_text[:400] + "..." if len(clean_text) > 400 else clean_text)
|
| 176 |
|
| 177 |
if results:
|
| 178 |
formatted_results = f"π **Dictionary Results for '{arabic_word}':**\n\n"
|
|
|
|
| 189 |
except Exception as e:
|
| 190 |
return f"β Dictionary lookup failed: {str(e)}\n\nπ **Try manual search:** https://arabiclexicon.hawramani.com"
|
| 191 |
|
| 192 |
+
|
| 193 |
+
def lookup_dictionary_alternative(arabic_word):
|
| 194 |
+
"""
|
| 195 |
+
Alternative lookup method using the old search format as fallback
|
| 196 |
+
"""
|
| 197 |
+
if not arabic_word or not arabic_word.strip():
|
| 198 |
+
return "No word provided for lookup."
|
| 199 |
+
|
| 200 |
+
try:
|
| 201 |
+
clean_word = clean_arabic_text(arabic_word)
|
| 202 |
+
encoded_word = urllib.parse.quote(clean_word)
|
| 203 |
+
|
| 204 |
+
# Try the original search format as fallback
|
| 205 |
+
search_url = f"https://arabiclexicon.hawramani.com/?search={encoded_word}&cat=9"
|
| 206 |
+
|
| 207 |
+
headers = {
|
| 208 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
response = requests.get(search_url, headers=headers, timeout=15)
|
| 212 |
+
response.raise_for_status()
|
| 213 |
+
|
| 214 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 215 |
+
|
| 216 |
+
# Look for search results in tables or divs
|
| 217 |
+
results = []
|
| 218 |
+
|
| 219 |
+
# Try to find table rows or result containers
|
| 220 |
+
result_elements = soup.find_all(['tr', 'div', 'p'], string=re.compile(r'[\u0600-\u06FF]+'))
|
| 221 |
+
|
| 222 |
+
for element in result_elements[:3]:
|
| 223 |
+
text_content = element.get_text(strip=True)
|
| 224 |
+
if text_content and len(text_content) > 20 and clean_word in text_content:
|
| 225 |
+
clean_text = text_content.replace('\n', ' ').replace(' ', ' ')
|
| 226 |
+
results.append(clean_text[:400] + "..." if len(clean_text) > 400 else clean_text)
|
| 227 |
+
|
| 228 |
+
if results:
|
| 229 |
+
formatted_results = f"π **Dictionary Results for '{arabic_word}' (Alternative Search):**\n\n"
|
| 230 |
+
for i, result in enumerate(results, 1):
|
| 231 |
+
formatted_results += f"**{i}.** {result}\n\n"
|
| 232 |
+
|
| 233 |
+
formatted_results += f"\nπ **Full results:** [View on Arabic Lexicon]({search_url})"
|
| 234 |
+
return formatted_results
|
| 235 |
+
else:
|
| 236 |
+
return f"π No results found with alternative search for '{arabic_word}'.\n\nπ **Try manual search:** [Search on Arabic Lexicon]({search_url})"
|
| 237 |
+
|
| 238 |
+
except Exception as e:
|
| 239 |
+
return f"β Alternative dictionary lookup failed: {str(e)}"
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
def lookup_dictionary_with_fallback(arabic_word):
|
| 243 |
+
"""
|
| 244 |
+
Main lookup function that tries the new format first, then falls back to the old format
|
| 245 |
+
"""
|
| 246 |
+
# Try the new format first
|
| 247 |
+
result = lookup_dictionary(arabic_word)
|
| 248 |
+
|
| 249 |
+
# If no results found, try the alternative format
|
| 250 |
+
if "No dictionary results found" in result:
|
| 251 |
+
alternative_result = lookup_dictionary_alternative(arabic_word)
|
| 252 |
+
if "No results found" not in alternative_result:
|
| 253 |
+
return alternative_result
|
| 254 |
+
|
| 255 |
+
return result
|
| 256 |
+
|
| 257 |
def lookup_multiple_words(arabic_text):
|
| 258 |
"""
|
| 259 |
Lookup multiple Arabic words separated by spaces
|