Spaces:

BenjaminKaindu0506
/

My_campus_agent

Runtime error

App Files Files Community

BenjaminKaindu0506 commited on Dec 31, 2025

Commit

5b6f01f

1 Parent(s): 6152309

Improve DuckDuckGo search parsing and error handling, update error messages

Browse files

Files changed (2) hide show

app.py +5 -3
search.py +36 -6

app.py CHANGED Viewed

@@ -61,11 +61,13 @@ def process_search_query(query: str, max_results: int = 8, model: Optional[str]
         if not search_results:
             return None, """No search results found. This could be because:
-1. SearXNG is not accessible (check if it's running or try a different instance)
-2. No UA pages matched your query
 **You can still use the app:**
-- Try the "🌐 Website URL" tab to analyze a specific UA webpage directly"""
         st.info(f"Found {len(search_results)} search results. Fetching pages...")

         if not search_results:
             return None, """No search results found. This could be because:
+1. DuckDuckGo search didn't find matching UA pages
+2. SearXNG fallback is not accessible
+3. No UA pages matched your query
 **You can still use the app:**
+- Try the "🌐 Website URL" tab to analyze a specific UA webpage directly
+- Try rephrasing your query with different keywords"""
         st.info(f"Found {len(search_results)} search results. Fetching pages...")

search.py CHANGED Viewed

@@ -437,7 +437,8 @@ def duckduckgo_primary_search(query: str, max_results: int = 10) -> List[Dict[st
                     url = link_elem.get('href', '')
                     # Clean up URL (remove DuckDuckGo redirect)
-                    if '/l/?kh=' in url or '/l/?uddg=' in url:
                         # Extract actual URL from DuckDuckGo redirect
                         match = re.search(r'uddg=([^&]+)', url)
                         if match:
@@ -449,6 +450,12 @@ def duckduckgo_primary_search(query: str, max_results: int = 10) -> List[Dict[st
                             if match:
                                 from urllib.parse import unquote
                                 url = unquote(match.group(1))
                     # Additional URL cleaning
                     if url.startswith('//'):
@@ -456,7 +463,17 @@ def duckduckgo_primary_search(query: str, max_results: int = 10) -> List[Dict[st
                     elif url.startswith('/'):
                         url = 'https://duckduckgo.com' + url
-                    if not url or not is_ua_domain(url):
                         continue
                     if url in seen_urls:
                         continue
@@ -506,16 +523,29 @@ def duckduckgo_primary_search(query: str, max_results: int = 10) -> List[Dict[st
                 print(f"✅ DuckDuckGo found {len(results)} real-time results for UA domains")
                 return results
             else:
-                print("⚠️ DuckDuckGo returned no UA domain results, trying Google...")
                 # Fallback to Google
-                return google_fallback_search(query, max_results)
     except httpx.TimeoutException:
         print("⚠️ DuckDuckGo request timed out, trying Google...")
-        return google_fallback_search(query, max_results)
     except Exception as e:
         print(f"⚠️ DuckDuckGo search error: {e}, trying Google...")
-        return google_fallback_search(query, max_results)
 def duckduckgo_fallback_search(query: str, max_results: int = 10) -> List[Dict[str, str]]:

                     url = link_elem.get('href', '')
                     # Clean up URL (remove DuckDuckGo redirect)
+                    original_url = url
+                    if '/l/?kh=' in url or '/l/?uddg=' in url or '/l/?uddg=' in url:
                         # Extract actual URL from DuckDuckGo redirect
                         match = re.search(r'uddg=([^&]+)', url)
                         if match:
                             if match:
                                 from urllib.parse import unquote
                                 url = unquote(match.group(1))
+                            else:
+                                # Try to extract from /l/?kh= format
+                                match = re.search(r'/l/\?kh=[^&]*&uddg=([^&]+)', url)
+                                if match:
+                                    from urllib.parse import unquote
+                                    url = unquote(match.group(1))
                     # Additional URL cleaning
                     if url.startswith('//'):
                     elif url.startswith('/'):
                         url = 'https://duckduckgo.com' + url
+                    # Check if URL is a UA domain
+                    if not url:
+                        continue
+                    # More lenient check - allow partial matches during parsing
+                    url_lower = url.lower()
+                    if 'arizona.edu' not in url_lower:
+                        continue
+                    # Now do strict domain check
+                    if not is_ua_domain(url):
                         continue
                     if url in seen_urls:
                         continue
                 print(f"✅ DuckDuckGo found {len(results)} real-time results for UA domains")
                 return results
             else:
+                print(f"⚠️ DuckDuckGo returned no UA domain results (found {len(result_divs)} total results)")
+                print("Trying Google as fallback...")
                 # Fallback to Google
+                google_results = google_fallback_search(query, max_results)
+                if google_results:
+                    return google_results
+                print("⚠️ All search methods failed to find UA domain results")
+                return []
     except httpx.TimeoutException:
         print("⚠️ DuckDuckGo request timed out, trying Google...")
+        google_results = google_fallback_search(query, max_results)
+        if google_results:
+            return google_results
+        print("⚠️ Google fallback also failed")
+        return []
     except Exception as e:
         print(f"⚠️ DuckDuckGo search error: {e}, trying Google...")
+        google_results = google_fallback_search(query, max_results)
+        if google_results:
+            return google_results
+        print(f"⚠️ Google fallback also failed: {e}")
+        return []
 def duckduckgo_fallback_search(query: str, max_results: int = 10) -> List[Dict[str, str]]: