| |
|
|
| export interface BibtexEntry { |
| key: string; |
| type: 'article' | 'inproceedings' | 'misc' | 'book' | 'phdthesis'; |
| title: string; |
| authors: string[]; |
| year?: number; |
| venue?: string; |
| volume?: string; |
| number?: string; |
| pages?: string; |
| doi?: string; |
| arxiv_id?: string; |
| url?: string; |
| abstract?: string; |
| citations?: number; |
| bibtex: string; |
| source: 'dblp' | 'semanticscholar'; |
| } |
|
|
| export interface BibtexSearchArgs { |
| query: string; |
| num?: number; |
| year?: number; |
| author?: string; |
| } |
|
|
| |
| function generateKey(title: string, year?: number): string { |
| const words = title.toLowerCase().split(/\s+/); |
| const firstWord = words[0]?.replace(/[^a-z]/g, '') || 'unknown'; |
| return `${firstWord}${year || ''}`; |
| } |
|
|
| |
| function formatAuthorsForBibtex(authors: string[]): string { |
| return authors.join(' and '); |
| } |
|
|
| |
| function escapeBibtex(str: string): string { |
| return str |
| .replace(/&/g, '\\&') |
| .replace(/%/g, '\\%') |
| .replace(/_/g, '\\_') |
| .replace(/\$/g, '\\$') |
| .replace(/#/g, '\\#'); |
| } |
|
|
| |
| function generateBibtexString(entry: Partial<BibtexEntry>): string { |
| const fields: string[] = []; |
|
|
| if (entry.title) fields.push(` title = {${escapeBibtex(entry.title)}}`); |
| if (entry.authors && entry.authors.length > 0) { |
| fields.push(` author = {${formatAuthorsForBibtex(entry.authors)}}`); |
| } |
| if (entry.year) fields.push(` year = {${entry.year}}`); |
| if (entry.venue) { |
| const venueField = entry.type === 'inproceedings' ? 'booktitle' : 'journal'; |
| fields.push(` ${venueField} = {${escapeBibtex(entry.venue)}}`); |
| } |
| if (entry.volume) fields.push(` volume = {${entry.volume}}`); |
| if (entry.number) fields.push(` number = {${entry.number}}`); |
| if (entry.pages) fields.push(` pages = {${entry.pages}}`); |
| if (entry.doi) fields.push(` doi = {${entry.doi}}`); |
| if (entry.url) fields.push(` url = {${entry.url}}`); |
| if (entry.arxiv_id) { |
| fields.push(` eprint = {${entry.arxiv_id}}`); |
| fields.push(` archivePrefix = {arXiv}`); |
| } |
|
|
| const type = entry.type || 'misc'; |
| const key = entry.key || generateKey(entry.title || 'unknown', entry.year); |
|
|
| return `@${type}{${key},\n${fields.join(',\n')}\n}`; |
| } |
|
|
| |
| export async function searchDblp(args: BibtexSearchArgs): Promise<BibtexEntry[]> { |
| const { query, num = 10, year, author } = args; |
|
|
| |
| let searchQuery = query; |
| if (author) { |
| searchQuery = `${searchQuery} ${author}`; |
| } |
|
|
| const params = new URLSearchParams({ |
| q: searchQuery, |
| format: 'json', |
| h: String(Math.min(num * 2, 100)), |
| }); |
|
|
| try { |
| const response = await fetch(`https://dblp.org/search/publ/api?${params}`, { |
| headers: { 'Accept': 'application/json' }, |
| signal: AbortSignal.timeout(5000), |
| }); |
|
|
| if (!response.ok) { |
| return []; |
| } |
|
|
| const data = await response.json() as any; |
| const hits = data?.result?.hits?.hit || []; |
|
|
| const results: BibtexEntry[] = []; |
|
|
| for (const hit of hits) { |
| const info = hit.info; |
| if (!info) continue; |
|
|
| |
| const pubYear = info.year ? parseInt(info.year) : undefined; |
| if (year && pubYear && pubYear < year) continue; |
|
|
| |
| let authors: string[] = []; |
| if (info.authors?.author) { |
| const authorList = Array.isArray(info.authors.author) |
| ? info.authors.author |
| : [info.authors.author]; |
| authors = authorList.map((a: any) => typeof a === 'string' ? a : a.text || a._); |
| } |
|
|
| |
| let type: BibtexEntry['type'] = 'misc'; |
| if (info.type === 'Conference and Workshop Papers') { |
| type = 'inproceedings'; |
| } else if (info.type === 'Journal Articles') { |
| type = 'article'; |
| } else if (info.type === 'Books and Theses') { |
| type = 'book'; |
| } |
|
|
| const entry: Partial<BibtexEntry> = { |
| type, |
| title: info.title?.replace(/\.$/, '') || '', |
| authors, |
| year: pubYear, |
| venue: info.venue, |
| volume: info.volume, |
| number: info.number, |
| pages: info.pages, |
| doi: info.doi, |
| url: info.ee || info.url, |
| source: 'dblp', |
| }; |
|
|
| entry.key = generateKey(entry.title!, entry.year); |
| entry.bibtex = generateBibtexString(entry); |
|
|
| results.push(entry as BibtexEntry); |
|
|
| if (results.length >= num) break; |
| } |
|
|
| return results; |
| } catch (error) { |
| |
| return []; |
| } |
| } |
|
|
| |
| export async function searchSemanticScholar(args: BibtexSearchArgs): Promise<BibtexEntry[]> { |
| const { query, num = 10, year } = args; |
|
|
| const params = new URLSearchParams({ |
| query, |
| limit: String(Math.min(num * 2, 100)), |
| fields: 'title,authors,year,venue,externalIds,abstract,citationCount,url', |
| }); |
|
|
| if (year) { |
| params.set('year', `${year}-`); |
| } |
|
|
| try { |
| const response = await fetch(`https://api.semanticscholar.org/graph/v1/paper/search?${params}`, { |
| headers: { 'Accept': 'application/json' }, |
| signal: AbortSignal.timeout(5000), |
| }); |
|
|
| if (!response.ok) { |
| return []; |
| } |
|
|
| const data = await response.json() as any; |
| const papers = data?.data || []; |
|
|
| const results: BibtexEntry[] = []; |
|
|
| for (const paper of papers) { |
| if (!paper.title) continue; |
|
|
| |
| const authors = (paper.authors || []).map((a: any) => a.name).filter(Boolean); |
|
|
| |
| const externalIds = paper.externalIds || {}; |
| const doi = externalIds.DOI; |
| const arxivId = externalIds.ArXiv; |
|
|
| |
| const type: BibtexEntry['type'] = paper.venue?.toLowerCase().includes('conference') |
| ? 'inproceedings' |
| : 'article'; |
|
|
| const entry: Partial<BibtexEntry> = { |
| type, |
| title: paper.title, |
| authors, |
| year: paper.year, |
| venue: paper.venue, |
| doi, |
| arxiv_id: arxivId, |
| url: paper.url, |
| abstract: paper.abstract, |
| citations: paper.citationCount, |
| source: 'semanticscholar', |
| }; |
|
|
| entry.key = generateKey(entry.title!, entry.year); |
| entry.bibtex = generateBibtexString(entry); |
|
|
| results.push(entry as BibtexEntry); |
|
|
| if (results.length >= num) break; |
| } |
|
|
| return results; |
| } catch (error) { |
| |
| return []; |
| } |
| } |
|
|
| |
| function normalizeDoi(doi: string): string { |
| return doi.toLowerCase() |
| .replace(/^https?:\/\/doi\.org\//i, '') |
| .replace(/^doi:/i, '') |
| .trim(); |
| } |
|
|
| |
| function similarity(a: string, b: string): number { |
| const wordsA = new Set(a.toLowerCase().split(/\s+/).filter(w => w.length > 2)); |
| const wordsB = new Set(b.toLowerCase().split(/\s+/).filter(w => w.length > 2)); |
|
|
| if (wordsA.size === 0 || wordsB.size === 0) return 0; |
|
|
| let intersection = 0; |
| for (const word of wordsA) { |
| if (wordsB.has(word)) intersection++; |
| } |
|
|
| return intersection / Math.max(wordsA.size, wordsB.size); |
| } |
|
|
| |
| export function deduplicateResults(results: BibtexEntry[]): BibtexEntry[] { |
| const seen = new Map<string, BibtexEntry>(); |
| const seenDois = new Map<string, string>(); |
| const seenArxiv = new Map<string, string>(); |
|
|
| for (const entry of results) { |
| |
| if (entry.doi) { |
| const normalizedDoi = normalizeDoi(entry.doi); |
| if (seenDois.has(normalizedDoi)) { |
| |
| const existingKey = seenDois.get(normalizedDoi)!; |
| const existing = seen.get(existingKey)!; |
| mergeEntries(existing, entry); |
| continue; |
| } |
| seenDois.set(normalizedDoi, entry.key); |
| } |
|
|
| |
| if (entry.arxiv_id) { |
| const normalizedArxiv = entry.arxiv_id.replace(/v\d+$/, ''); |
| if (seenArxiv.has(normalizedArxiv)) { |
| const existingKey = seenArxiv.get(normalizedArxiv)!; |
| const existing = seen.get(existingKey)!; |
| mergeEntries(existing, entry); |
| continue; |
| } |
| seenArxiv.set(normalizedArxiv, entry.key); |
| } |
|
|
| |
| let isDuplicate = false; |
| for (const [key, existing] of seen) { |
| if (similarity(entry.title, existing.title) > 0.85 && |
| entry.year === existing.year) { |
| mergeEntries(existing, entry); |
| isDuplicate = true; |
| break; |
| } |
| } |
|
|
| if (!isDuplicate) { |
| seen.set(entry.key, entry); |
| } |
| } |
|
|
| |
| return Array.from(seen.values()).sort((a, b) => { |
| if (a.year && b.year) { |
| if (a.year !== b.year) return b.year - a.year; |
| } |
| return a.title.localeCompare(b.title); |
| }); |
| } |
|
|
| |
| function mergeEntries(target: BibtexEntry, source: BibtexEntry): void { |
| |
| if (source.abstract && (!target.abstract || source.abstract.length > target.abstract.length)) { |
| target.abstract = source.abstract; |
| } |
|
|
| |
| if (source.citations && (!target.citations || source.citations > target.citations)) { |
| target.citations = source.citations; |
| } |
|
|
| |
| if (!target.doi && source.doi) target.doi = source.doi; |
| if (!target.arxiv_id && source.arxiv_id) target.arxiv_id = source.arxiv_id; |
| if (!target.url && source.url) target.url = source.url; |
| if (!target.volume && source.volume) target.volume = source.volume; |
| if (!target.pages && source.pages) target.pages = source.pages; |
|
|
| |
| target.bibtex = generateBibtexString(target); |
| } |
|
|
| |
| export async function searchBibtex(args: BibtexSearchArgs): Promise<BibtexEntry[]> { |
| const { num = 10 } = args; |
|
|
| |
| const [dblpResults, s2Results] = await Promise.all([ |
| searchDblp(args), |
| searchSemanticScholar(args), |
| ]); |
|
|
| |
| const combined = [...dblpResults, ...s2Results]; |
| const deduplicated = deduplicateResults(combined); |
|
|
| |
| return deduplicated.slice(0, num); |
| } |
|
|