Spaces:
Paused
Paused
| """ | |
| Web Research Scanner | |
| ===================== | |
| Scans free sources for related research and collaboration opportunities. | |
| """ | |
| import json | |
| import logging | |
| import urllib.request | |
| import urllib.parse | |
| from typing import Optional | |
| from datetime import datetime | |
| logger = logging.getLogger("openclaw.webscan") | |
| class WebResearchScanner: | |
| """Scan public APIs for research updates.""" | |
| def search_arxiv_related(self, topics: list[str], max_results: int = 10) -> list[dict]: | |
| """Search ArXiv for papers related to our research topics.""" | |
| papers = [] | |
| for topic in topics[:3]: # Limit to avoid rate limits | |
| try: | |
| query = urllib.parse.urlencode({ | |
| "search_query": f'all:"{topic}"', | |
| "start": 0, | |
| "max_results": max_results, | |
| "sortBy": "submittedDate", | |
| "sortOrder": "descending" | |
| }) | |
| url = f"http://export.arxiv.org/api/query?{query}" | |
| req = urllib.request.Request(url, headers={"User-Agent": "OpenCLAW-Agent/1.0"}) | |
| with urllib.request.urlopen(req, timeout=30) as resp: | |
| import xml.etree.ElementTree as ET | |
| data = resp.read().decode() | |
| root = ET.fromstring(data) | |
| ns = {"atom": "http://www.w3.org/2005/Atom"} | |
| for entry in root.findall("atom:entry", ns): | |
| title = entry.find("atom:title", ns).text.strip().replace("\n", " ") | |
| authors = [a.find("atom:name", ns).text for a in entry.findall("atom:author", ns)] | |
| paper_url = "" | |
| for link in entry.findall("atom:link", ns): | |
| if "abs" in link.get("href", ""): | |
| paper_url = link.get("href") | |
| papers.append({ | |
| "title": title, | |
| "authors": authors[:3], | |
| "url": paper_url, | |
| "topic": topic, | |
| }) | |
| except Exception as e: | |
| logger.warning(f"ArXiv search for '{topic}' failed: {e}") | |
| return papers | |
| def search_semantic_scholar(self, query: str, limit: int = 5) -> list[dict]: | |
| """Search Semantic Scholar API (free, no key needed).""" | |
| papers = [] | |
| try: | |
| encoded = urllib.parse.quote(query) | |
| url = f"https://api.semanticscholar.org/graph/v1/paper/search?query={encoded}&limit={limit}&fields=title,authors,url,year" | |
| req = urllib.request.Request(url, headers={"User-Agent": "OpenCLAW-Agent/1.0"}) | |
| with urllib.request.urlopen(req, timeout=15) as resp: | |
| data = json.loads(resp.read().decode()) | |
| for p in data.get("data", []): | |
| papers.append({ | |
| "title": p.get("title", ""), | |
| "authors": [a.get("name", "") for a in p.get("authors", [])[:3]], | |
| "url": p.get("url", ""), | |
| "year": p.get("year"), | |
| }) | |
| except Exception as e: | |
| logger.warning(f"Semantic Scholar search failed: {e}") | |
| return papers | |
| def search_hf_models(self, query: str, limit: int = 5) -> list[dict]: | |
| """Search Hugging Face for relevant models.""" | |
| models = [] | |
| try: | |
| encoded = urllib.parse.quote(query) | |
| url = f"https://huggingface.co/api/models?search={encoded}&limit={limit}&sort=downloads&direction=-1" | |
| req = urllib.request.Request(url, headers={"User-Agent": "OpenCLAW-Agent/1.0"}) | |
| with urllib.request.urlopen(req, timeout=15) as resp: | |
| data = json.loads(resp.read().decode()) | |
| for m in data: | |
| models.append({ | |
| "id": m.get("modelId", ""), | |
| "downloads": m.get("downloads", 0), | |
| "likes": m.get("likes", 0), | |
| "tags": m.get("tags", [])[:5], | |
| }) | |
| except Exception as e: | |
| logger.warning(f"HF model search failed: {e}") | |
| return models | |
| def find_potential_collaborators(self, topics: list[str]) -> list[dict]: | |
| """Find researchers working on similar topics via Semantic Scholar.""" | |
| collaborators = [] | |
| seen_names = set() | |
| for topic in topics[:3]: | |
| papers = self.search_semantic_scholar(topic, limit=5) | |
| for p in papers: | |
| for author in p.get("authors", []): | |
| name = author if isinstance(author, str) else author.get("name", "") | |
| if name and name not in seen_names and "Angulo" not in name: | |
| seen_names.add(name) | |
| collaborators.append({ | |
| "name": name, | |
| "paper": p.get("title", ""), | |
| "topic": topic, | |
| }) | |
| return collaborators[:20] | |