Spaces:

kambris
/

Varabic

Sleeping

App Files Files Community

kambris commited on Jun 20, 2025

Commit

ddaedae

verified ·

1 Parent(s): 9396938

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -15

app.py CHANGED Viewed

@@ -130,36 +130,49 @@ def lookup_dictionary(arabic_word):
         # Encode the Arabic word for URL
         encoded_word = urllib.parse.quote(clean_word)
-        # Construct the search URL for Arabic Lexicon
-        search_url = f"https://arabiclexicon.hawramani.com/?search={encoded_word}&type=0"
         # Set headers to mimic a browser request
         headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
         # Make the request
-        response = requests.get(search_url, headers=headers, timeout=10)
         response.raise_for_status()
         # Parse the HTML response
         soup = BeautifulSoup(response.content, 'html.parser')
-        # Extract search results (this may need adjustment based on the site's structure)
         results = []
-        # Look for search result containers (adjust selectors based on actual site structure)
-        result_containers = soup.find_all(['div', 'tr', 'li'], class_=re.compile(r'result|entry|definition', re.I))
-        if not result_containers:
-            # Fallback: look for any elements containing Arabic text
-            result_containers = soup.find_all(text=re.compile(r'[\u0600-\u06FF]+'))
-            result_containers = [elem.parent for elem in result_containers if elem.parent][:5]
-        for container in result_containers[:3]:  # Limit to first 3 results
-            text_content = container.get_text(strip=True)
-            if text_content and len(text_content) > 10 and clean_word in text_content:
-                results.append(text_content[:200] + "..." if len(text_content) > 200 else text_content)
         if results:
             formatted_results = f"📖 **Dictionary Results for '{arabic_word}':**\n\n"
@@ -176,6 +189,71 @@ def lookup_dictionary(arabic_word):
     except Exception as e:
         return f"❌ Dictionary lookup failed: {str(e)}\n\n🔗 **Try manual search:** https://arabiclexicon.hawramani.com"
 def lookup_multiple_words(arabic_text):
     """
     Lookup multiple Arabic words separated by spaces

         # Encode the Arabic word for URL
         encoded_word = urllib.parse.quote(clean_word)
+        # Construct the search URL for Arabic Lexicon with cat=9 (appears to be a specific category)
+        search_url = f"https://arabiclexicon.hawramani.com/search/{encoded_word}?cat=9"
         # Set headers to mimic a browser request
         headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Accept-Encoding': 'gzip, deflate',
+            'Connection': 'keep-alive'
         }
         # Make the request
+        response = requests.get(search_url, headers=headers, timeout=15)
         response.raise_for_status()
         # Parse the HTML response
         soup = BeautifulSoup(response.content, 'html.parser')
+        # Extract search results - based on the structure I can see
         results = []
+        # Look for the main content spans that contain the definitions
+        content_spans = soup.find_all('span', {'index': True})
+        if content_spans:
+            for span in content_spans[:2]:  # Limit to first 2 spans
+                text_content = span.get_text(strip=True)
+                if text_content and len(text_content) > 20:
+                    # Clean up the text and format it nicely
+                    clean_text = text_content.replace('\n', ' ').replace('  ', ' ')
+                    results.append(clean_text[:500] + "..." if len(clean_text) > 500 else clean_text)
+        # Fallback: look for any Arabic text content if spans don't work
+        if not results:
+            # Look for divs or other containers with Arabic text
+            arabic_text_elements = soup.find_all(text=re.compile(r'[\u0600-\u06FF]{3,}'))
+            for element in arabic_text_elements[:3]:
+                parent_text = element.parent.get_text(strip=True) if element.parent else str(element)
+                if len(parent_text) > 30 and clean_word in parent_text:
+                    clean_text = parent_text.replace('\n', ' ').replace('  ', ' ')
+                    results.append(clean_text[:400] + "..." if len(clean_text) > 400 else clean_text)
         if results:
             formatted_results = f"📖 **Dictionary Results for '{arabic_word}':**\n\n"
     except Exception as e:
         return f"❌ Dictionary lookup failed: {str(e)}\n\n🔗 **Try manual search:** https://arabiclexicon.hawramani.com"
+def lookup_dictionary_alternative(arabic_word):
+    """
+    Alternative lookup method using the old search format as fallback
+    """
+    if not arabic_word or not arabic_word.strip():
+        return "No word provided for lookup."
+    try:
+        clean_word = clean_arabic_text(arabic_word)
+        encoded_word = urllib.parse.quote(clean_word)
+        # Try the original search format as fallback
+        search_url = f"https://arabiclexicon.hawramani.com/?search={encoded_word}&cat=9"
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        }
+        response = requests.get(search_url, headers=headers, timeout=15)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.content, 'html.parser')
+        # Look for search results in tables or divs
+        results = []
+        # Try to find table rows or result containers
+        result_elements = soup.find_all(['tr', 'div', 'p'], string=re.compile(r'[\u0600-\u06FF]+'))
+        for element in result_elements[:3]:
+            text_content = element.get_text(strip=True)
+            if text_content and len(text_content) > 20 and clean_word in text_content:
+                clean_text = text_content.replace('\n', ' ').replace('  ', ' ')
+                results.append(clean_text[:400] + "..." if len(clean_text) > 400 else clean_text)
+        if results:
+            formatted_results = f"📖 **Dictionary Results for '{arabic_word}' (Alternative Search):**\n\n"
+            for i, result in enumerate(results, 1):
+                formatted_results += f"**{i}.** {result}\n\n"
+            formatted_results += f"\n🔗 **Full results:** [View on Arabic Lexicon]({search_url})"
+            return formatted_results
+        else:
+            return f"📖 No results found with alternative search for '{arabic_word}'.\n\n🔗 **Try manual search:** [Search on Arabic Lexicon]({search_url})"
+    except Exception as e:
+        return f"❌ Alternative dictionary lookup failed: {str(e)}"
+def lookup_dictionary_with_fallback(arabic_word):
+    """
+    Main lookup function that tries the new format first, then falls back to the old format
+    """
+    # Try the new format first
+    result = lookup_dictionary(arabic_word)
+    # If no results found, try the alternative format
+    if "No dictionary results found" in result:
+        alternative_result = lookup_dictionary_alternative(arabic_word)
+        if "No results found" not in alternative_result:
+            return alternative_result
+    return result
 def lookup_multiple_words(arabic_text):
     """
     Lookup multiple Arabic words separated by spaces