| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| import type { BriefingBlock, Citation, ClaimPart } from '$lib/types/claim'; |
| import { tierForDocId, type Tier } from '$lib/types/tier'; |
|
|
| const CANONICAL_SECTIONS: Array<{ key: string; label: string; n: string; tier?: Tier; aliases: string[] }> = [ |
| { key: 'status', label: 'Status', n: '01', aliases: ['status'] }, |
| { key: 'empirical', label: 'Empirical evidence', n: '02', tier: 'empirical', aliases: ['empirical evidence', 'empirical'] }, |
| { key: 'modeled', label: 'Modeled scenarios', n: '03', tier: 'modeled', aliases: ['modeled scenarios', 'modeled'] }, |
| { key: 'policy', label: 'Policy context', n: '04', aliases: ['policy context', 'policy'] } |
| ]; |
|
|
| function findSection(rawTitle: string) { |
| const t = rawTitle.toLowerCase().replace(/[.:]+\s*$/, '').trim(); |
| return CANONICAL_SECTIONS.find((s) => s.aliases.includes(t)); |
| } |
|
|
| |
| |
| const SECTION_HEAD_RE = /(^|\n)\s*(?:\*\*([A-Z][A-Za-z\s/]+?)\.\s*\*\*|#{1,3}\s*(0[1-4])\s*[:\-—.]?\s*([^\n]+))/g; |
|
|
| export interface ParseResult { |
| blocks: BriefingBlock[]; |
| citations: Record<string, Citation>; |
| |
| unresolvedDocIds: string[]; |
| } |
|
|
| |
| |
| |
| |
| |
| export function citationFromMeta( |
| n: number, |
| docId: string, |
| meta?: Partial<Pick<Citation, 'source' | 'title' | 'url' | 'vintage' | 'retrieved'>> |
| ): Citation { |
| return { |
| id: docId, |
| n, |
| tier: tierForDocId(docId), |
| source: meta?.source ?? docId.split(/[_-]/)[0].toUpperCase(), |
| title: meta?.title ?? docId, |
| docId, |
| url: meta?.url ?? '', |
| vintage: meta?.vintage ?? '', |
| retrieved: meta?.retrieved ?? '' |
| }; |
| } |
|
|
| const CITE_RE = /\[([a-z][a-z0-9_]*(?:\s*,\s*[a-z][a-z0-9_]*)*)\]/gi; |
|
|
| function splitSentences(text: string): string[] { |
| const parts = text.split(/(?<=[.!?])\s+(?=[A-Z(])/g); |
| return parts.filter((s) => s.trim().length > 0); |
| } |
|
|
| function parseSentenceParts( |
| sentence: string, |
| cites: Record<string, Citation>, |
| registerCite: (docId: string) => Citation |
| ): ClaimPart[] { |
| let cursor = 0; |
| const parts: ClaimPart[] = []; |
| let firstTier: Tier | undefined; |
| const matches = [...sentence.matchAll(CITE_RE)]; |
| if (matches.length === 0) { |
| return [{ text: sentence }]; |
| } |
| for (const m of matches) { |
| const before = sentence.slice(cursor, m.index ?? 0); |
| const docIds = m[1].split(/\s*,\s*/).filter(Boolean); |
| cursor = (m.index ?? 0) + m[0].length; |
|
|
| const tier = tierForDocId(docIds[0]); |
| if (!firstTier) firstTier = tier; |
|
|
| parts.push({ text: before, tier, cite: docIds[0] }); |
| for (const id of docIds) { |
| if (!cites[id]) cites[id] = registerCite(id); |
| } |
| } |
| if (cursor < sentence.length) { |
| const tail = sentence.slice(cursor); |
| if (tail.trim()) parts.push({ text: tail }); |
| } |
| return parts; |
| } |
|
|
| |
| |
| |
| |
| export function parseBriefing( |
| markdown: string, |
| knownCitations: Record<string, Citation> = {} |
| ): ParseResult { |
| const cites: Record<string, Citation> = { ...knownCitations }; |
| let nextN = Object.values(cites).reduce((m, c) => Math.max(m, c.n), 0) + 1; |
| const unresolvedDocIds = new Set<string>(); |
| const registerCite = (docId: string): Citation => { |
| if (!knownCitations[docId]) unresolvedDocIds.add(docId); |
| const c = citationFromMeta(nextN++, docId); |
| return c; |
| }; |
|
|
| const blocks: BriefingBlock[] = []; |
|
|
| type Idx = { num: string; label: string; tier?: Tier; titleExtra?: string; start: number; bodyStart: number }; |
| const indices: Idx[] = []; |
| let m: RegExpExecArray | null; |
| SECTION_HEAD_RE.lastIndex = 0; |
| while ((m = SECTION_HEAD_RE.exec(markdown))) { |
| if (m[2] !== undefined) { |
| |
| const sec = findSection(m[2]); |
| if (!sec) continue; |
| indices.push({ |
| num: sec.n, |
| label: sec.label, |
| tier: sec.tier, |
| start: m.index + m[1].length, |
| bodyStart: m.index + m[0].length |
| }); |
| } else if (m[3] !== undefined) { |
| |
| const num = m[3]; |
| const title = (m[4] ?? '').trim(); |
| const sec = CANONICAL_SECTIONS.find((s) => s.n === num) ?? findSection(title); |
| indices.push({ |
| num, |
| label: sec?.label ?? title, |
| tier: sec?.tier, |
| titleExtra: sec && title.toLowerCase() !== sec.label.toLowerCase() ? title : undefined, |
| start: m.index + m[1].length, |
| bodyStart: m.index + m[0].length |
| }); |
| } |
| } |
|
|
| |
| |
| for (let i = 0; i < indices.length; i++) { |
| const sec = indices[i]; |
| const next = indices[i + 1]; |
| const body = markdown.slice(sec.bodyStart, next ? next.start : markdown.length).trim(); |
| if (!body) continue; |
|
|
| blocks.push({ |
| kind: 'head', |
| n: sec.num, |
| label: sec.label, |
| tier: sec.tier, |
| title: sec.titleExtra |
| }); |
|
|
| for (const para of body.split(/\n\s*\n/)) { |
| const flat = para.replace(/\s+/g, ' ').trim(); |
| if (!flat) continue; |
|
|
| const sentences = splitSentences(flat); |
| const parts: ClaimPart[] = []; |
| for (const s of sentences) { |
| parts.push(...parseSentenceParts(s, cites, registerCite)); |
| parts.push({ text: ' ' }); |
| } |
| while (parts.length && parts[parts.length - 1].text.trim() === '' && !parts[parts.length - 1].tier) { |
| parts.pop(); |
| } |
| if (parts.length) blocks.push({ kind: 'prose', parts }); |
| } |
| } |
|
|
| |
| |
| |
| if (blocks.length === 0 && markdown.trim()) { |
| blocks.push({ kind: 'head', n: '01', label: 'Status' }); |
| const flat = markdown.replace(/\s+/g, ' ').trim(); |
| const sentences = splitSentences(flat); |
| const parts: ClaimPart[] = []; |
| for (const s of sentences) { |
| parts.push(...parseSentenceParts(s, cites, registerCite)); |
| parts.push({ text: ' ' }); |
| } |
| while (parts.length && parts[parts.length - 1].text.trim() === '' && !parts[parts.length - 1].tier) { |
| parts.pop(); |
| } |
| if (parts.length) blocks.push({ kind: 'prose', parts }); |
| } |
|
|
| return { blocks, citations: cites, unresolvedDocIds: [...unresolvedDocIds] }; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| function escapeHtml(s: string): string { |
| return s |
| .replace(/&/g, '&') |
| .replace(/</g, '<') |
| .replace(/>/g, '>') |
| .replace(/"/g, '"'); |
| } |
|
|