// Aggregations for the supply-chain views (Sankey, disclosure, time strip). // Pure functions over the deal rows we already loaded — no DB access here. // // All three views share the same input so the page only needs one Prisma query. interface DealRow { buyer: string | null provider: string | null modality: string | null date: string | null priceUsd: number | null priceRangeMinUsd: number | null priceRangeMaxUsd: number | null exclusive: boolean | null creatorsCompensated: boolean | null extractionMetadata: string | null } const EXCLUDED_BUYERS = new Set([ 'Multiple AI labs', 'Multiple AI Labs', 'Multiple labs', 'Various', 'Various AI labs', 'Unnamed AI firms', 'Unnamed AI Firms', 'undisclosed', ]) function splitBuyers(s: string | null): string[] { if (!s) return [] return s .split(',') .map((b) => b.trim()) .filter((b) => b && !EXCLUDED_BUYERS.has(b)) } function pickPrice(d: DealRow): number { // Conservative: use confirmed priceUsd, else the midpoint of the disclosed range. if (d.priceUsd != null) return d.priceUsd if (d.priceRangeMinUsd != null && d.priceRangeMaxUsd != null) { return (d.priceRangeMinUsd + d.priceRangeMaxUsd) / 2 } if (d.priceRangeMinUsd != null) return d.priceRangeMinUsd return 0 } // Concentration on a market side: 0 = perfectly distributed, 1 = monopoly. // Computed from share of deal count, not spend (spend is sparse). function herfindahl(counts: number[]): number { const total = counts.reduce((s, c) => s + c, 0) if (total === 0) return 0 const shares = counts.map((c) => c / total) return shares.reduce((s, x) => s + x * x, 0) } // Sankey ------------------------------------------------------------------ export interface SankeyNode { name: string count: number spend: number share: number // share of total deals (0-1) } export interface SankeyFlow { provider: string buyer: string count: number spend: number } export interface SankeyData { providers: SankeyNode[] buyers: SankeyNode[] flows: SankeyFlow[] buyerHerfindahl: number providerHerfindahl: number totalDeals: number } export function buildSankey(deals: DealRow[], topProvidersN = 12): SankeyData { const providerCounts: Record = {} const providerSpend: Record = {} const buyerCounts: Record = {} const buyerSpend: Record = {} const flowMap: Record = {} for (const d of deals) { if (!d.provider) continue const buyers = splitBuyers(d.buyer) if (buyers.length === 0) continue const price = pickPrice(d) providerCounts[d.provider] = (providerCounts[d.provider] || 0) + 1 providerSpend[d.provider] = (providerSpend[d.provider] || 0) + price for (const b of buyers) { buyerCounts[b] = (buyerCounts[b] || 0) + 1 buyerSpend[b] = (buyerSpend[b] || 0) + price const key = `${d.provider}→${b}` if (!flowMap[key]) flowMap[key] = { provider: d.provider, buyer: b, count: 0, spend: 0 } flowMap[key].count += 1 flowMap[key].spend += price } } const totalDeals = deals.length const sortedProviders = Object.entries(providerCounts).sort(([, a], [, b]) => b - a) const topProviderNames = new Set(sortedProviders.slice(0, topProvidersN).map(([n]) => n)) // Collapse providers outside the top-N into a single "Other providers" node so the // diagram stays legible without dropping the long tail of small deals entirely. const collapsedProviderCounts: Record = {} const collapsedProviderSpend: Record = {} for (const [name, count] of sortedProviders) { const key = topProviderNames.has(name) ? name : 'Other providers' collapsedProviderCounts[key] = (collapsedProviderCounts[key] || 0) + count collapsedProviderSpend[key] = (collapsedProviderSpend[key] || 0) + (providerSpend[name] || 0) } const collapsedFlowMap: Record = {} for (const flow of Object.values(flowMap)) { const provKey = topProviderNames.has(flow.provider) ? flow.provider : 'Other providers' const key = `${provKey}→${flow.buyer}` if (!collapsedFlowMap[key]) { collapsedFlowMap[key] = { provider: provKey, buyer: flow.buyer, count: 0, spend: 0 } } collapsedFlowMap[key].count += flow.count collapsedFlowMap[key].spend += flow.spend } const providers: SankeyNode[] = Object.entries(collapsedProviderCounts) .sort(([, a], [, b]) => b - a) .map(([name, count]) => ({ name, count, spend: collapsedProviderSpend[name] || 0, share: count / totalDeals, })) const buyers: SankeyNode[] = Object.entries(buyerCounts) .sort(([, a], [, b]) => b - a) .map(([name, count]) => ({ name, count, spend: buyerSpend[name] || 0, share: count / totalDeals, })) return { providers, buyers, flows: Object.values(collapsedFlowMap).sort((a, b) => b.count - a.count), buyerHerfindahl: herfindahl(Object.values(buyerCounts)), providerHerfindahl: herfindahl(Object.values(providerCounts)), totalDeals, } } // Disclosure -------------------------------------------------------------- export interface DisclosureBreakdown { field: string known: number unknown: number knownPercent: number reasons: { reason: string; count: number }[] } export interface DisclosureData { totalDeals: number financial: DisclosureBreakdown creator: DisclosureBreakdown exclusivity: DisclosureBreakdown } interface MissingReason { field: string reason: string } function reasonsForField(deal: DealRow, field: string): string[] { if (!deal.extractionMetadata) return [] try { const meta = JSON.parse(deal.extractionMetadata) as { missing_reasons?: MissingReason[] } return (meta.missing_reasons ?? []).filter((m) => m.field === field).map((m) => m.reason) } catch { return [] } } export function buildDisclosure(deals: DealRow[]): DisclosureData { const total = deals.length const financialKnown = deals.filter( (d) => d.priceUsd != null || d.priceRangeMinUsd != null, ).length const financialReasons: Record = {} for (const d of deals) { if (d.priceUsd == null && d.priceRangeMinUsd == null) { const reasons = reasonsForField(d, 'financial_terms') const reason = reasons[0] ?? 'no_provenance_recorded' financialReasons[reason] = (financialReasons[reason] || 0) + 1 } } const creatorKnown = deals.filter((d) => d.creatorsCompensated !== null).length const exclusivityKnown = deals.filter((d) => d.exclusive !== null).length const summarise = ( field: string, known: number, reasons: Record, ): DisclosureBreakdown => ({ field, known, unknown: total - known, knownPercent: total > 0 ? known / total : 0, reasons: Object.entries(reasons) .map(([reason, count]) => ({ reason, count })) .sort((a, b) => b.count - a.count), }) return { totalDeals: total, financial: summarise('Financial terms', financialKnown, financialReasons), creator: summarise('Creator compensation', creatorKnown, {}), exclusivity: summarise('Exclusivity', exclusivityKnown, {}), } } // Time series ------------------------------------------------------------- export interface TimePoint { // First-of-month ISO date used as the x-axis position date: string cumulativeDeals: number cumulativeSpend: number } export interface TimeSeriesData { points: TimePoint[] finalDeals: number finalSpend: number firstDate: string lastDate: string } // Year-only dates ("2024") get bucketed to mid-year so they sit between the two // halves of the year on the cumulative line — better than dumping them all into // January, which created a visible vertical step at year boundaries. function parseDealDate(s: string | null): Date | null { if (!s) return null const yearOnly = /^\d{4}$/ const yearMonth = /^(\d{4})-(\d{2})$/ const yearMonthDay = /^(\d{4})-(\d{2})-(\d{2})$/ if (yearOnly.test(s)) return new Date(`${s}-07-01`) let m = s.match(yearMonth) if (m) return new Date(`${m[1]}-${m[2]}-01`) m = s.match(yearMonthDay) if (m) return new Date(s) return null } export function buildTimeSeries(deals: DealRow[]): TimeSeriesData { const dated = deals .map((d) => ({ deal: d, date: parseDealDate(d.date), price: pickPrice(d) })) .filter((x): x is { deal: DealRow; date: Date; price: number } => x.date !== null) .sort((a, b) => a.date.getTime() - b.date.getTime()) if (dated.length === 0) { return { points: [], finalDeals: 0, finalSpend: 0, firstDate: '', lastDate: '' } } // Bucket by month so the line steps once per month instead of once per deal. const monthBuckets: Record = {} for (const { date, price } of dated) { const key = `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-01` if (!monthBuckets[key]) monthBuckets[key] = { count: 0, spend: 0 } monthBuckets[key].count += 1 monthBuckets[key].spend += price } const sortedMonths = Object.keys(monthBuckets).sort() const points: TimePoint[] = [] let cDeals = 0 let cSpend = 0 for (const month of sortedMonths) { cDeals += monthBuckets[month].count cSpend += monthBuckets[month].spend points.push({ date: month, cumulativeDeals: cDeals, cumulativeSpend: cSpend }) } return { points, finalDeals: cDeals, finalSpend: cSpend, firstDate: sortedMonths[0], lastDate: sortedMonths[sortedMonths.length - 1], } }