| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| const _NARRATIVE_RE = /\b([A-Z][a-zA-ZÀ-ÿ\-']+)(?:\s+et\s+al\.?|\s+&\s+[A-Z][a-zA-ZÀ-ÿ\-']+)?\s+\(((19|20)\d{2}[a-z]?)\)/g; |
| const _PAREN_RE = /\(([A-Z][a-zA-ZÀ-ÿ\-']+(?:\s+et\s+al\.?|\s+&\s+[A-Z][a-zA-ZÀ-ÿ\-']+)?\s+(?:19|20)\d{2}[a-z]?(?:\s*;\s*[A-Z][a-zA-ZÀ-ÿ\-']+(?:\s+et\s+al\.?|\s+&\s+[A-Z][a-zA-ZÀ-ÿ\-']+)?\s+(?:19|20)\d{2}[a-z]?)*)\)/g; |
| const _CITE_UNIT = /([A-Z][a-zA-ZÀ-ÿ\-']+)(?:\s+et\s+al\.?|\s+&\s+[A-Z][a-zA-ZÀ-ÿ\-']+)?\s+((19|20)\d{2}[a-z]?)/g; |
|
|
| function _lookupRefs(surname, year, citeIndex, refsById) { |
| const key = `${surname.toLowerCase()}:${year}`; |
| const ids = citeIndex[key] || []; |
| return ids.map((id) => refsById[id]).filter(Boolean).map((r) => ({ |
| id: r.id, |
| corpusMatch: r.corpusMatch, |
| bibcode: r.bibcode, |
| arxiv: r.arxiv, |
| })); |
| } |
|
|
| export function splitCitations(text, citeIndex, references) { |
| if (!text) return []; |
| const refsById = Object.fromEntries((references || []).map((r) => [r.id, r])); |
|
|
| |
| |
| const matches = []; |
|
|
| _NARRATIVE_RE.lastIndex = 0; |
| let m; |
| while ((m = _NARRATIVE_RE.exec(text)) !== null) { |
| const surname = m[1]; |
| const year = m[2]; |
| const refs = _lookupRefs(surname, year, citeIndex, refsById); |
| matches.push({ start: m.index, end: m.index + m[0].length, s: m[0], refs }); |
| } |
|
|
| _PAREN_RE.lastIndex = 0; |
| while ((m = _PAREN_RE.exec(text)) !== null) { |
| |
| const covered = matches.some((mx) => mx.start <= m.index && m.index < mx.end); |
| if (covered) continue; |
|
|
| const inner = m[1]; |
| _CITE_UNIT.lastIndex = 0; |
| const units = []; |
| let unit; |
| while ((unit = _CITE_UNIT.exec(inner)) !== null) { |
| const refs = _lookupRefs(unit[1], unit[2], citeIndex, refsById); |
| units.push({ s: unit[0], refs, iStart: unit.index, iEnd: unit.index + unit[0].length }); |
| } |
| matches.push({ |
| start: m.index, end: m.index + m[0].length, s: m[0], |
| refs: units.flatMap((u) => u.refs), |
| inner, units, |
| }); |
| } |
|
|
| matches.sort((a, b) => a.start - b.start); |
|
|
| |
| |
| const segments = []; |
| let lastIndex = 0; |
| for (const mx of matches) { |
| if (mx.start > lastIndex) { |
| segments.push({ t: 'text', s: text.slice(lastIndex, mx.start) }); |
| } |
| if (mx.units) { |
| segments.push({ t: 'text', s: '(' }); |
| let innerLast = 0; |
| for (const u of mx.units) { |
| if (u.iStart > innerLast) { |
| segments.push({ t: 'text', s: mx.inner.slice(innerLast, u.iStart) }); |
| } |
| segments.push({ t: 'cite', s: u.s, refs: u.refs }); |
| innerLast = u.iEnd; |
| } |
| if (innerLast < mx.inner.length) { |
| segments.push({ t: 'text', s: mx.inner.slice(innerLast) }); |
| } |
| segments.push({ t: 'text', s: ')' }); |
| } else { |
| segments.push({ t: 'cite', s: mx.s, refs: mx.refs }); |
| } |
| lastIndex = mx.end; |
| } |
| if (lastIndex < text.length) { |
| segments.push({ t: 'text', s: text.slice(lastIndex) }); |
| } |
| return segments; |
| } |
|
|