Replaced duckduckgo search with wiki search.
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
# (Keep Constants as is)
|
|
@@ -15,7 +16,7 @@ class BasicAgent:
|
|
| 15 |
|
| 16 |
def search_internet(self, query: str) -> str:
|
| 17 |
"""
|
| 18 |
-
Search the internet for information using
|
| 19 |
This is a simple implementation that returns search results as text.
|
| 20 |
|
| 21 |
Args:
|
|
@@ -26,32 +27,56 @@ class BasicAgent:
|
|
| 26 |
"""
|
| 27 |
print(f"Searching internet for: {query}")
|
| 28 |
try:
|
| 29 |
-
#
|
| 30 |
headers = {
|
| 31 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 32 |
}
|
| 33 |
-
search_url = f"https://api.duckduckgo.com/?q={query}&format=json&no_html=1&no_redirect=1&skip_disambig=1"
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
#
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
#
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
# Extract
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
results.append(f"- {topic['Text']}")
|
| 50 |
|
| 51 |
-
if
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
else:
|
| 54 |
-
return "
|
| 55 |
|
| 56 |
except Exception as e:
|
| 57 |
print(f"Error searching internet: {e}")
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
| 4 |
+
import inspect
|
| 5 |
import pandas as pd
|
| 6 |
|
| 7 |
# (Keep Constants as is)
|
|
|
|
| 16 |
|
| 17 |
def search_internet(self, query: str) -> str:
|
| 18 |
"""
|
| 19 |
+
Search the internet for information using Wikipedia's API.
|
| 20 |
This is a simple implementation that returns search results as text.
|
| 21 |
|
| 22 |
Args:
|
|
|
|
| 27 |
"""
|
| 28 |
print(f"Searching internet for: {query}")
|
| 29 |
try:
|
| 30 |
+
# Use Wikipedia API to search for information
|
| 31 |
headers = {
|
| 32 |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 33 |
}
|
|
|
|
| 34 |
|
| 35 |
+
# Step 1: Search for relevant articles
|
| 36 |
+
search_url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={query}&format=json"
|
| 37 |
+
search_response = requests.get(search_url, headers=headers, timeout=10)
|
| 38 |
+
search_response.raise_for_status()
|
| 39 |
+
search_data = search_response.json()
|
| 40 |
|
| 41 |
+
# Check if we found any search results
|
| 42 |
+
if 'query' not in search_data or 'search' not in search_data['query'] or not search_data['query']['search']:
|
| 43 |
+
return "No relevant information found."
|
| 44 |
+
|
| 45 |
+
# Get the title of the first (most relevant) result
|
| 46 |
+
first_result = search_data['query']['search'][0]
|
| 47 |
+
page_title = first_result['title']
|
| 48 |
|
| 49 |
+
# Step 2: Fetch the content of the most relevant article
|
| 50 |
+
content_url = f"https://en.wikipedia.org/w/api.php?action=query&prop=extracts&exintro=1&explaintext=1&titles={page_title}&format=json"
|
| 51 |
+
content_response = requests.get(content_url, headers=headers, timeout=10)
|
| 52 |
+
content_response.raise_for_status()
|
| 53 |
+
content_data = content_response.json()
|
| 54 |
|
| 55 |
+
# Extract the page content
|
| 56 |
+
pages = content_data['query']['pages']
|
| 57 |
+
page_id = list(pages.keys())[0]
|
|
|
|
| 58 |
|
| 59 |
+
if 'extract' in pages[page_id]:
|
| 60 |
+
extract = pages[page_id]['extract']
|
| 61 |
+
# Limit extract length to avoid very long responses
|
| 62 |
+
if len(extract) > 1000:
|
| 63 |
+
extract = extract[:1000] + "..."
|
| 64 |
+
|
| 65 |
+
result = f"Wikipedia article: {page_title}\n\n{extract}"
|
| 66 |
+
|
| 67 |
+
# Also get a few more related article titles
|
| 68 |
+
related_titles = []
|
| 69 |
+
for item in search_data['query']['search'][1:4]: # Get next 3 results
|
| 70 |
+
related_titles.append(item['title'])
|
| 71 |
+
|
| 72 |
+
if related_titles:
|
| 73 |
+
result += "\n\nRelated topics:\n"
|
| 74 |
+
for title in related_titles:
|
| 75 |
+
result += f"- {title}\n"
|
| 76 |
+
|
| 77 |
+
return result
|
| 78 |
else:
|
| 79 |
+
return "Found a relevant page, but couldn't extract its content."
|
| 80 |
|
| 81 |
except Exception as e:
|
| 82 |
print(f"Error searching internet: {e}")
|