Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| # Wikipedia needs the local proxy to be accessible from China | |
| _PROXY = os.getenv("LOCAL_PROXY", "http://127.0.0.1:7890") | |
| _wiki_session = requests.Session() | |
| _wiki_session.proxies = {"http": _PROXY, "https": _PROXY} | |
| _wiki_session.trust_env = False | |
| _wiki_session.headers.update({ | |
| "User-Agent": "Mozilla/5.0 (compatible; ResearchAgent/1.0; educational use)" | |
| }) | |
| def wikipedia_search(query: str) -> str: | |
| """Search Wikipedia for encyclopedic information.""" | |
| # Use Wikipedia REST API directly | |
| search_url = "https://en.wikipedia.org/w/api.php" | |
| try: | |
| # Step 1: search for page title | |
| search_resp = _wiki_session.get(search_url, timeout=15, params={ | |
| "action": "query", | |
| "list": "search", | |
| "srsearch": query, | |
| "srlimit": 3, | |
| "format": "json", | |
| }) | |
| search_resp.raise_for_status() | |
| results = search_resp.json().get("query", {}).get("search", []) | |
| if not results: | |
| return "No Wikipedia articles found." | |
| # Step 2: fetch content of top result | |
| title = results[0]["title"] | |
| content_resp = _wiki_session.get(search_url, timeout=15, params={ | |
| "action": "query", | |
| "titles": title, | |
| "prop": "extracts", | |
| "exintro": False, | |
| "explaintext": True, | |
| "format": "json", | |
| }) | |
| content_resp.raise_for_status() | |
| pages = content_resp.json().get("query", {}).get("pages", {}) | |
| page = next(iter(pages.values())) | |
| text = page.get("extract", "") | |
| if not text: | |
| return f"Wikipedia article '{title}' has no extractable content." | |
| url = f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}" | |
| return f"Wikipedia: {title}\nURL: {url}\n\n{text[:3000]}" | |
| except Exception as e: | |
| return f"Wikipedia search failed: {e}" | |