File size: 5,355 Bytes
aa5a2db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""
Web Research Scanner
=====================
Scans free sources for related research and collaboration opportunities.
"""
import json
import logging
import urllib.request
import urllib.parse
from typing import Optional
from datetime import datetime

logger = logging.getLogger("openclaw.webscan")


class WebResearchScanner:
    """Scan public APIs for research updates."""
    
    def search_arxiv_related(self, topics: list[str], max_results: int = 10) -> list[dict]:
        """Search ArXiv for papers related to our research topics."""
        papers = []
        
        for topic in topics[:3]:  # Limit to avoid rate limits
            try:
                query = urllib.parse.urlencode({
                    "search_query": f'all:"{topic}"',
                    "start": 0,
                    "max_results": max_results,
                    "sortBy": "submittedDate",
                    "sortOrder": "descending"
                })
                url = f"http://export.arxiv.org/api/query?{query}"
                req = urllib.request.Request(url, headers={"User-Agent": "OpenCLAW-Agent/1.0"})
                
                with urllib.request.urlopen(req, timeout=30) as resp:
                    import xml.etree.ElementTree as ET
                    data = resp.read().decode()
                    root = ET.fromstring(data)
                    ns = {"atom": "http://www.w3.org/2005/Atom"}
                    
                    for entry in root.findall("atom:entry", ns):
                        title = entry.find("atom:title", ns).text.strip().replace("\n", " ")
                        authors = [a.find("atom:name", ns).text for a in entry.findall("atom:author", ns)]
                        
                        paper_url = ""
                        for link in entry.findall("atom:link", ns):
                            if "abs" in link.get("href", ""):
                                paper_url = link.get("href")
                        
                        papers.append({
                            "title": title,
                            "authors": authors[:3],
                            "url": paper_url,
                            "topic": topic,
                        })
                
            except Exception as e:
                logger.warning(f"ArXiv search for '{topic}' failed: {e}")
        
        return papers
    
    def search_semantic_scholar(self, query: str, limit: int = 5) -> list[dict]:
        """Search Semantic Scholar API (free, no key needed)."""
        papers = []
        try:
            encoded = urllib.parse.quote(query)
            url = f"https://api.semanticscholar.org/graph/v1/paper/search?query={encoded}&limit={limit}&fields=title,authors,url,year"
            
            req = urllib.request.Request(url, headers={"User-Agent": "OpenCLAW-Agent/1.0"})
            with urllib.request.urlopen(req, timeout=15) as resp:
                data = json.loads(resp.read().decode())
                
                for p in data.get("data", []):
                    papers.append({
                        "title": p.get("title", ""),
                        "authors": [a.get("name", "") for a in p.get("authors", [])[:3]],
                        "url": p.get("url", ""),
                        "year": p.get("year"),
                    })
        except Exception as e:
            logger.warning(f"Semantic Scholar search failed: {e}")
        
        return papers
    
    def search_hf_models(self, query: str, limit: int = 5) -> list[dict]:
        """Search Hugging Face for relevant models."""
        models = []
        try:
            encoded = urllib.parse.quote(query)
            url = f"https://huggingface.co/api/models?search={encoded}&limit={limit}&sort=downloads&direction=-1"
            
            req = urllib.request.Request(url, headers={"User-Agent": "OpenCLAW-Agent/1.0"})
            with urllib.request.urlopen(req, timeout=15) as resp:
                data = json.loads(resp.read().decode())
                
                for m in data:
                    models.append({
                        "id": m.get("modelId", ""),
                        "downloads": m.get("downloads", 0),
                        "likes": m.get("likes", 0),
                        "tags": m.get("tags", [])[:5],
                    })
        except Exception as e:
            logger.warning(f"HF model search failed: {e}")
        
        return models
    
    def find_potential_collaborators(self, topics: list[str]) -> list[dict]:
        """Find researchers working on similar topics via Semantic Scholar."""
        collaborators = []
        seen_names = set()
        
        for topic in topics[:3]:
            papers = self.search_semantic_scholar(topic, limit=5)
            for p in papers:
                for author in p.get("authors", []):
                    name = author if isinstance(author, str) else author.get("name", "")
                    if name and name not in seen_names and "Angulo" not in name:
                        seen_names.add(name)
                        collaborators.append({
                            "name": name,
                            "paper": p.get("title", ""),
                            "topic": topic,
                        })
        
        return collaborators[:20]