Spaces:
Runtime error
Runtime error
File size: 4,954 Bytes
e92be04 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | /**
* Academic Search Service
* =======================
* Gives agents access to REAL academic papers from:
* - ArXiv (free, no API key needed)
* - Semantic Scholar (free tier, no key needed for basic search)
* - CrossRef (free, no key)
*
* Agents can now search, cite, and build upon real published research
* instead of generating hallucinated references.
*/
/**
* Search ArXiv for papers matching a query.
* @param {string} query - Search terms
* @param {number} maxResults - Max papers to return (default 10)
* @returns {Promise<Array<{title, authors, abstract, arxiv_id, url, published}>>}
*/
export async function searchArXiv(query, maxResults = 10) {
try {
const encoded = encodeURIComponent(query);
const url = `http://export.arxiv.org/api/query?search_query=all:${encoded}&start=0&max_results=${maxResults}&sortBy=relevance`;
const response = await fetch(url, { signal: AbortSignal.timeout(15000) });
const xml = await response.text();
// Parse ArXiv Atom XML
const entries = xml.split('<entry>').slice(1);
return entries.map(entry => {
const extract = (tag) => {
const match = entry.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)</${tag}>`));
return match ? match[1].trim() : '';
};
return {
title: extract('title').replace(/\\n/g, ' ').replace(/\s+/g, ' '),
authors: [...entry.matchAll(/<name>([^<]+)<\/name>/g)].map(m => m[1]).join(', '),
abstract: extract('summary').replace(/\\n/g, ' ').replace(/\s+/g, ' ').substring(0, 500),
arxiv_id: extract('id').replace('http://arxiv.org/abs/', ''),
url: extract('id'),
published: extract('published'),
source: 'arxiv'
};
});
} catch (e) {
console.error('[ACADEMIC] ArXiv search error:', e.message);
return [];
}
}
/**
* Search Semantic Scholar for papers.
* Free tier: 100 req/5min, no API key needed.
* @param {string} query
* @param {number} limit
* @returns {Promise<Array<{title, authors, abstract, paperId, url, year, citationCount}>>}
*/
export async function searchSemanticScholar(query, limit = 10) {
try {
const encoded = encodeURIComponent(query);
const url = `https://api.semanticscholar.org/graph/v1/paper/search?query=${encoded}&limit=${limit}&fields=title,authors,abstract,year,citationCount,url`;
const response = await fetch(url, {
headers: { 'Accept': 'application/json' },
signal: AbortSignal.timeout(15000)
});
const data = await response.json();
return (data.data || []).map(p => ({
title: p.title,
authors: (p.authors || []).map(a => a.name).join(', '),
abstract: (p.abstract || '').substring(0, 500),
paperId: p.paperId,
url: p.url || `https://www.semanticscholar.org/paper/${p.paperId}`,
year: p.year,
citationCount: p.citationCount || 0,
source: 'semantic_scholar'
}));
} catch (e) {
console.error('[ACADEMIC] Semantic Scholar search error:', e.message);
return [];
}
}
/**
* Search CrossRef for DOI-registered papers.
* Free, no API key needed.
*/
export async function searchCrossRef(query, limit = 10) {
try {
const encoded = encodeURIComponent(query);
const url = `https://api.crossref.org/works?query=${encoded}&rows=${limit}&select=title,author,abstract,DOI,URL,published-print`;
const response = await fetch(url, {
headers: { 'User-Agent': 'P2PCLAW/1.0 (https://p2pclaw.com; mailto:openclaw@proton.me)' },
signal: AbortSignal.timeout(15000)
});
const data = await response.json();
return (data.message?.items || []).map(item => ({
title: (item.title || [''])[0],
authors: (item.author || []).map(a => `${a.given || ''} ${a.family || ''}`).join(', '),
abstract: (item.abstract || '').replace(/<[^>]+>/g, '').substring(0, 500),
doi: item.DOI,
url: item.URL,
year: item['published-print']?.['date-parts']?.[0]?.[0],
source: 'crossref'
}));
} catch (e) {
console.error('[ACADEMIC] CrossRef search error:', e.message);
return [];
}
}
/**
* Unified search across all sources.
* Returns merged, deduplicated results ranked by relevance.
*/
export async function searchAcademic(query, maxPerSource = 5) {
const [arxiv, s2, crossref] = await Promise.allSettled([
searchArXiv(query, maxPerSource),
searchSemanticScholar(query, maxPerSource),
searchCrossRef(query, maxPerSource)
]);
const results = [
...(arxiv.status === 'fulfilled' ? arxiv.value : []),
...(s2.status === 'fulfilled' ? s2.value : []),
...(crossref.status === 'fulfilled' ? crossref.value : [])
];
return {
query,
total: results.length,
sources: {
arxiv: arxiv.status === 'fulfilled' ? arxiv.value.length : 0,
semantic_scholar: s2.status === 'fulfilled' ? s2.value.length : 0,
crossref: crossref.status === 'fulfilled' ? crossref.value.length : 0
},
results
};
}
|