Spaces:
Sleeping
Sleeping
Add filtering, sorting, and grouping to linkages page and implement comprehensive model enrichment pipeline
Browse files- app/analytics/page.tsx +9 -9
- app/api/models/enrich-web/route.ts +103 -0
- app/components/Sidebar.tsx +5 -12
- app/components/Tooltip.tsx +1 -1
- app/components/deals/DealFeed.tsx +4 -4
- app/components/deals/DiscoveryButton.tsx +1 -1
- app/components/linkages/CreateLinkagesButton.tsx +1 -1
- app/components/models/IngestModelsButton.tsx +1 -1
- app/components/models/TokenCalculationCard.tsx +1 -1
- app/components/models/TokenEstimateTooltip.tsx +1 -1
- app/deals/DealModal.tsx +3 -3
- app/deals/DealsClient.tsx +15 -15
- app/deals/[id]/page.tsx +16 -16
- app/globals.css +19 -19
- app/linkages/LinkagesClient.tsx +622 -0
- app/linkages/page.tsx +86 -238
- app/models/[id]/page.tsx +16 -16
- app/models/page.tsx +15 -17
- app/normalization/page.tsx +6 -6
- app/page.tsx +5 -5
- app/timeline/page.tsx +2 -2
- registry/enrich_all_models.py +207 -0
- registry/enrichment/__init__.py +4 -0
- registry/enrichment/comprehensive_enrichment.py +370 -0
- registry/enrichment/llm_extractor.py +227 -0
- registry/enrichment/web_enrichment.py +220 -0
- registry/evidence_profile.py +64 -0
- registry/ingest_priority_models.py +85 -12
- registry/requirements.txt +3 -0
app/analytics/page.tsx
CHANGED
|
@@ -104,15 +104,15 @@ export default async function AnalyticsPage() {
|
|
| 104 |
return (
|
| 105 |
<div className="min-h-screen bg-background">
|
| 106 |
<div className="container-content section-padding">
|
| 107 |
-
<div className="mb-
|
| 108 |
-
<h1 className="text-
|
| 109 |
-
<p className="text-text-muted text-
|
| 110 |
Market structure, concentration, and trends
|
| 111 |
</p>
|
| 112 |
</div>
|
| 113 |
|
| 114 |
{/* Key Stats */}
|
| 115 |
-
<div className="grid grid-cols-1 md:grid-cols-4 gap-
|
| 116 |
<div className="stat-card">
|
| 117 |
<div className="stat-value">{analytics.totalDeals}</div>
|
| 118 |
<div className="stat-label">Total Deals</div>
|
|
@@ -132,8 +132,8 @@ export default async function AnalyticsPage() {
|
|
| 132 |
</div>
|
| 133 |
|
| 134 |
{/* Modality Breakdown */}
|
| 135 |
-
<div className="card mb-
|
| 136 |
-
<h2 className="text-
|
| 137 |
<div className="space-y-4">
|
| 138 |
{Object.entries(analytics.modalityCounts)
|
| 139 |
.sort(([, a], [, b]) => b - a)
|
|
@@ -152,9 +152,9 @@ export default async function AnalyticsPage() {
|
|
| 152 |
</div>
|
| 153 |
|
| 154 |
{/* Top Buyers */}
|
| 155 |
-
<div className="grid md:grid-cols-2 gap-
|
| 156 |
<div className="card">
|
| 157 |
-
<h2 className="text-
|
| 158 |
<div className="space-y-4">
|
| 159 |
{analytics.topBuyers.map((buyer, idx) => (
|
| 160 |
<div key={buyer.name} className="flex items-center justify-between py-2">
|
|
@@ -173,7 +173,7 @@ export default async function AnalyticsPage() {
|
|
| 173 |
|
| 174 |
{/* Top Providers */}
|
| 175 |
<div className="card">
|
| 176 |
-
<h2 className="text-
|
| 177 |
<div className="space-y-4">
|
| 178 |
{analytics.topProviders.map((provider, idx) => (
|
| 179 |
<div key={provider.name} className="flex items-center justify-between py-2">
|
|
|
|
| 104 |
return (
|
| 105 |
<div className="min-h-screen bg-background">
|
| 106 |
<div className="container-content section-padding">
|
| 107 |
+
<div className="mb-4">
|
| 108 |
+
<h1 className="text-3xl font-semibold mb-1">Market Analytics</h1>
|
| 109 |
+
<p className="text-text-muted text-sm">
|
| 110 |
Market structure, concentration, and trends
|
| 111 |
</p>
|
| 112 |
</div>
|
| 113 |
|
| 114 |
{/* Key Stats */}
|
| 115 |
+
<div className="grid grid-cols-1 md:grid-cols-4 gap-4 mb-8">
|
| 116 |
<div className="stat-card">
|
| 117 |
<div className="stat-value">{analytics.totalDeals}</div>
|
| 118 |
<div className="stat-label">Total Deals</div>
|
|
|
|
| 132 |
</div>
|
| 133 |
|
| 134 |
{/* Modality Breakdown */}
|
| 135 |
+
<div className="card mb-6">
|
| 136 |
+
<h2 className="text-xl font-semibold mb-4">Deals by Modality</h2>
|
| 137 |
<div className="space-y-4">
|
| 138 |
{Object.entries(analytics.modalityCounts)
|
| 139 |
.sort(([, a], [, b]) => b - a)
|
|
|
|
| 152 |
</div>
|
| 153 |
|
| 154 |
{/* Top Buyers */}
|
| 155 |
+
<div className="grid md:grid-cols-2 gap-4 mb-6">
|
| 156 |
<div className="card">
|
| 157 |
+
<h2 className="text-xl font-semibold mb-4">Top Buyers by Spend</h2>
|
| 158 |
<div className="space-y-4">
|
| 159 |
{analytics.topBuyers.map((buyer, idx) => (
|
| 160 |
<div key={buyer.name} className="flex items-center justify-between py-2">
|
|
|
|
| 173 |
|
| 174 |
{/* Top Providers */}
|
| 175 |
<div className="card">
|
| 176 |
+
<h2 className="text-xl font-semibold mb-4">Top Providers by Spend</h2>
|
| 177 |
<div className="space-y-4">
|
| 178 |
{analytics.topProviders.map((provider, idx) => (
|
| 179 |
<div key={provider.name} className="flex items-center justify-between py-2">
|
app/api/models/enrich-web/route.ts
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { NextResponse } from 'next/server'
|
| 2 |
+
import { exec } from 'child_process'
|
| 3 |
+
import { promisify } from 'util'
|
| 4 |
+
import path from 'path'
|
| 5 |
+
import { existsSync } from 'fs'
|
| 6 |
+
|
| 7 |
+
const execAsync = promisify(exec)
|
| 8 |
+
|
| 9 |
+
export const dynamic = 'force-dynamic'
|
| 10 |
+
export const maxDuration = 600 // 10 minutes for web enrichment
|
| 11 |
+
|
| 12 |
+
/**
|
| 13 |
+
* POST /api/models/enrich-web - Enrich models using web search and LLM extraction
|
| 14 |
+
*
|
| 15 |
+
* Optional query params:
|
| 16 |
+
* - limit: number of models to enrich (default: all)
|
| 17 |
+
* - no_web: disable web search (default: false)
|
| 18 |
+
* - no_llm: disable LLM extraction (default: false)
|
| 19 |
+
*/
|
| 20 |
+
export async function POST(request: Request) {
|
| 21 |
+
try {
|
| 22 |
+
const { searchParams } = new URL(request.url)
|
| 23 |
+
const limit = searchParams.get('limit')
|
| 24 |
+
const noWeb = searchParams.get('no_web') === 'true'
|
| 25 |
+
const noLlm = searchParams.get('no_llm') === 'true'
|
| 26 |
+
|
| 27 |
+
// Path to the enrichment script
|
| 28 |
+
const enrichScript = path.join(process.cwd(), 'registry', 'enrich_all_models.py')
|
| 29 |
+
const venvPython = path.join(process.cwd(), 'venv', 'bin', 'python3')
|
| 30 |
+
const pythonPath = process.env.PYTHON_PATH || (existsSync(venvPython) ? venvPython : 'python3')
|
| 31 |
+
|
| 32 |
+
console.log(`Starting web enrichment: limit=${limit || 'all'}, web=${!noWeb}, llm=${!noLlm}`)
|
| 33 |
+
|
| 34 |
+
// Build command
|
| 35 |
+
let command = `${pythonPath} ${enrichScript}`
|
| 36 |
+
if (limit) {
|
| 37 |
+
command += ` --limit ${limit}`
|
| 38 |
+
}
|
| 39 |
+
if (noWeb) {
|
| 40 |
+
command += ` --no-web`
|
| 41 |
+
}
|
| 42 |
+
if (noLlm) {
|
| 43 |
+
command += ` --no-llm`
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
const { stdout, stderr } = await execAsync(command, {
|
| 47 |
+
cwd: process.cwd(),
|
| 48 |
+
timeout: 540000, // 9 minutes timeout
|
| 49 |
+
env: {
|
| 50 |
+
...process.env,
|
| 51 |
+
PYTHONUNBUFFERED: '1',
|
| 52 |
+
},
|
| 53 |
+
})
|
| 54 |
+
|
| 55 |
+
// Parse output to extract summary
|
| 56 |
+
const summary: {
|
| 57 |
+
success: boolean
|
| 58 |
+
limit?: number
|
| 59 |
+
no_web: boolean
|
| 60 |
+
no_llm: boolean
|
| 61 |
+
output: string
|
| 62 |
+
errors: string
|
| 63 |
+
timestamp: string
|
| 64 |
+
models_enriched?: number
|
| 65 |
+
errors_count?: number
|
| 66 |
+
} = {
|
| 67 |
+
success: true,
|
| 68 |
+
no_web: noWeb,
|
| 69 |
+
no_llm: noLlm,
|
| 70 |
+
output: stdout,
|
| 71 |
+
errors: stderr,
|
| 72 |
+
timestamp: new Date().toISOString(),
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
if (limit) {
|
| 76 |
+
summary.limit = parseInt(limit)
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
// Try to extract numbers from output
|
| 80 |
+
const enrichedMatch = stdout.match(/Successfully enriched: (\d+)\/(\d+)/)
|
| 81 |
+
const errorsMatch = stdout.match(/Errors: (\d+)/)
|
| 82 |
+
|
| 83 |
+
if (enrichedMatch) {
|
| 84 |
+
summary.models_enriched = parseInt(enrichedMatch[1])
|
| 85 |
+
}
|
| 86 |
+
if (errorsMatch) {
|
| 87 |
+
summary.errors_count = parseInt(errorsMatch[1])
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
return NextResponse.json(summary)
|
| 91 |
+
} catch (error: any) {
|
| 92 |
+
console.error('Web enrichment error:', error)
|
| 93 |
+
return NextResponse.json(
|
| 94 |
+
{
|
| 95 |
+
success: false,
|
| 96 |
+
error: error.message || 'Failed to enrich models with web search',
|
| 97 |
+
timestamp: new Date().toISOString(),
|
| 98 |
+
},
|
| 99 |
+
{ status: 500 }
|
| 100 |
+
)
|
| 101 |
+
}
|
| 102 |
+
}
|
| 103 |
+
|
app/components/Sidebar.tsx
CHANGED
|
@@ -10,37 +10,31 @@ export default function Sidebar() {
|
|
| 10 |
{
|
| 11 |
href: '/',
|
| 12 |
label: 'Deals',
|
| 13 |
-
icon: 'π',
|
| 14 |
},
|
| 15 |
{
|
| 16 |
href: '/timeline',
|
| 17 |
label: 'Timeline',
|
| 18 |
-
icon: 'π
',
|
| 19 |
},
|
| 20 |
{
|
| 21 |
href: '/models',
|
| 22 |
label: 'Models',
|
| 23 |
-
icon: 'π€',
|
| 24 |
},
|
| 25 |
{
|
| 26 |
href: '/linkages',
|
| 27 |
label: 'Linkages',
|
| 28 |
-
icon: 'π',
|
| 29 |
},
|
| 30 |
{
|
| 31 |
href: '/analytics',
|
| 32 |
label: 'Analytics',
|
| 33 |
-
icon: 'π',
|
| 34 |
},
|
| 35 |
]
|
| 36 |
|
| 37 |
return (
|
| 38 |
<aside className="w-64 bg-surface border-r border-border flex-shrink-0 min-h-screen sticky top-0">
|
| 39 |
-
<div className="p-
|
| 40 |
-
<h2 className="text-lg font-semibold
|
| 41 |
-
<p className="text-xs text-text-muted">Deals Dashboard</p>
|
| 42 |
</div>
|
| 43 |
-
<nav className="p-
|
| 44 |
<ul className="space-y-1">
|
| 45 |
{navItems.map((item) => {
|
| 46 |
const isActive = pathname === item.href
|
|
@@ -48,14 +42,13 @@ export default function Sidebar() {
|
|
| 48 |
<li key={item.href}>
|
| 49 |
<Link
|
| 50 |
href={item.href}
|
| 51 |
-
className={`
|
| 52 |
isActive
|
| 53 |
? 'bg-accent/10 text-accent font-medium'
|
| 54 |
: 'text-text-muted hover:bg-border-subtle hover:text-text'
|
| 55 |
}`}
|
| 56 |
>
|
| 57 |
-
|
| 58 |
-
<span>{item.label}</span>
|
| 59 |
</Link>
|
| 60 |
</li>
|
| 61 |
)
|
|
|
|
| 10 |
{
|
| 11 |
href: '/',
|
| 12 |
label: 'Deals',
|
|
|
|
| 13 |
},
|
| 14 |
{
|
| 15 |
href: '/timeline',
|
| 16 |
label: 'Timeline',
|
|
|
|
| 17 |
},
|
| 18 |
{
|
| 19 |
href: '/models',
|
| 20 |
label: 'Models',
|
|
|
|
| 21 |
},
|
| 22 |
{
|
| 23 |
href: '/linkages',
|
| 24 |
label: 'Linkages',
|
|
|
|
| 25 |
},
|
| 26 |
{
|
| 27 |
href: '/analytics',
|
| 28 |
label: 'Analytics',
|
|
|
|
| 29 |
},
|
| 30 |
]
|
| 31 |
|
| 32 |
return (
|
| 33 |
<aside className="w-64 bg-surface border-r border-border flex-shrink-0 min-h-screen sticky top-0">
|
| 34 |
+
<div className="p-4 border-b border-border">
|
| 35 |
+
<h2 className="text-lg font-semibold">AI Training Data</h2>
|
|
|
|
| 36 |
</div>
|
| 37 |
+
<nav className="p-2">
|
| 38 |
<ul className="space-y-1">
|
| 39 |
{navItems.map((item) => {
|
| 40 |
const isActive = pathname === item.href
|
|
|
|
| 42 |
<li key={item.href}>
|
| 43 |
<Link
|
| 44 |
href={item.href}
|
| 45 |
+
className={`px-3 py-2 rounded-none text-sm transition-colors ${
|
| 46 |
isActive
|
| 47 |
? 'bg-accent/10 text-accent font-medium'
|
| 48 |
: 'text-text-muted hover:bg-border-subtle hover:text-text'
|
| 49 |
}`}
|
| 50 |
>
|
| 51 |
+
{item.label}
|
|
|
|
| 52 |
</Link>
|
| 53 |
</li>
|
| 54 |
)
|
app/components/Tooltip.tsx
CHANGED
|
@@ -33,7 +33,7 @@ export default function Tooltip({
|
|
| 33 |
{children}
|
| 34 |
{isVisible && (
|
| 35 |
<div
|
| 36 |
-
className={`absolute z-50 ${positionClasses[position]} w-64 p-3 bg-surface border border-border rounded-
|
| 37 |
>
|
| 38 |
{content}
|
| 39 |
{/* Arrow */}
|
|
|
|
| 33 |
{children}
|
| 34 |
{isVisible && (
|
| 35 |
<div
|
| 36 |
+
className={`absolute z-50 ${positionClasses[position]} w-64 p-3 bg-surface border border-border rounded-none shadow-lg text-xs text-text leading-relaxed pointer-events-none`}
|
| 37 |
>
|
| 38 |
{content}
|
| 39 |
{/* Arrow */}
|
app/components/deals/DealFeed.tsx
CHANGED
|
@@ -104,17 +104,17 @@ function FeedItemCard({ item }: { item: FeedItem }) {
|
|
| 104 |
const isArticle = item.type === 'article' || (!isTwitter && item.url.startsWith('http'))
|
| 105 |
|
| 106 |
return (
|
| 107 |
-
<div className="border border-border-subtle rounded-
|
| 108 |
<div className="flex items-start justify-between gap-3">
|
| 109 |
<div className="flex-1 min-w-0">
|
| 110 |
<div className="flex items-center gap-2 mb-2">
|
| 111 |
{isTwitter && (
|
| 112 |
-
<span className="text-[10px] px-1.5 py-0.5 bg-blue-500/20 text-blue-500 rounded-
|
| 113 |
TWITTER
|
| 114 |
</span>
|
| 115 |
)}
|
| 116 |
{isArticle && (
|
| 117 |
-
<span className="text-[10px] px-1.5 py-0.5 bg-accent/20 text-accent rounded-
|
| 118 |
ARTICLE
|
| 119 |
</span>
|
| 120 |
)}
|
|
@@ -192,7 +192,7 @@ function FeedItemCard({ item }: { item: FeedItem }) {
|
|
| 192 |
<div className="mt-3 pt-3 border-t border-border-subtle">
|
| 193 |
<iframe
|
| 194 |
src={item.url}
|
| 195 |
-
className="w-full h-64 border border-border-subtle rounded-
|
| 196 |
title={item.title || 'Article preview'}
|
| 197 |
sandbox="allow-same-origin allow-scripts"
|
| 198 |
/>
|
|
|
|
| 104 |
const isArticle = item.type === 'article' || (!isTwitter && item.url.startsWith('http'))
|
| 105 |
|
| 106 |
return (
|
| 107 |
+
<div className="border border-border-subtle rounded-none p-4 hover:border-border transition-colors">
|
| 108 |
<div className="flex items-start justify-between gap-3">
|
| 109 |
<div className="flex-1 min-w-0">
|
| 110 |
<div className="flex items-center gap-2 mb-2">
|
| 111 |
{isTwitter && (
|
| 112 |
+
<span className="text-[10px] px-1.5 py-0.5 bg-blue-500/20 text-blue-500 rounded-none font-mono">
|
| 113 |
TWITTER
|
| 114 |
</span>
|
| 115 |
)}
|
| 116 |
{isArticle && (
|
| 117 |
+
<span className="text-[10px] px-1.5 py-0.5 bg-accent/20 text-accent rounded-none font-mono">
|
| 118 |
ARTICLE
|
| 119 |
</span>
|
| 120 |
)}
|
|
|
|
| 192 |
<div className="mt-3 pt-3 border-t border-border-subtle">
|
| 193 |
<iframe
|
| 194 |
src={item.url}
|
| 195 |
+
className="w-full h-64 border border-border-subtle rounded-none"
|
| 196 |
title={item.title || 'Article preview'}
|
| 197 |
sandbox="allow-same-origin allow-scripts"
|
| 198 |
/>
|
app/components/deals/DiscoveryButton.tsx
CHANGED
|
@@ -69,7 +69,7 @@ export default function DiscoveryButton() {
|
|
| 69 |
</button>
|
| 70 |
|
| 71 |
{status && (
|
| 72 |
-
<div className="absolute top-full right-0 mt-2 z-50 bg-surface border border-border rounded-
|
| 73 |
<div className="text-xs font-semibold mb-2 text-text">{status}</div>
|
| 74 |
{results && (
|
| 75 |
<div className="text-xs text-text-muted space-y-1">
|
|
|
|
| 69 |
</button>
|
| 70 |
|
| 71 |
{status && (
|
| 72 |
+
<div className="absolute top-full right-0 mt-2 z-50 bg-surface border border-border rounded-none shadow-lg p-3 min-w-[300px]">
|
| 73 |
<div className="text-xs font-semibold mb-2 text-text">{status}</div>
|
| 74 |
{results && (
|
| 75 |
<div className="text-xs text-text-muted space-y-1">
|
app/components/linkages/CreateLinkagesButton.tsx
CHANGED
|
@@ -54,7 +54,7 @@ export default function CreateLinkagesButton() {
|
|
| 54 |
</button>
|
| 55 |
|
| 56 |
{status && (
|
| 57 |
-
<div className="absolute top-full right-0 mt-2 z-50 bg-surface border border-border rounded-
|
| 58 |
<div className="text-xs font-semibold mb-2 text-text">{status}</div>
|
| 59 |
{results && (
|
| 60 |
<div className="text-xs text-text-muted space-y-1">
|
|
|
|
| 54 |
</button>
|
| 55 |
|
| 56 |
{status && (
|
| 57 |
+
<div className="absolute top-full right-0 mt-2 z-50 bg-surface border border-border rounded-none shadow-lg p-3 min-w-[300px]">
|
| 58 |
<div className="text-xs font-semibold mb-2 text-text">{status}</div>
|
| 59 |
{results && (
|
| 60 |
<div className="text-xs text-text-muted space-y-1">
|
app/components/models/IngestModelsButton.tsx
CHANGED
|
@@ -78,7 +78,7 @@ export default function IngestModelsButton() {
|
|
| 78 |
</button>
|
| 79 |
|
| 80 |
{status && (
|
| 81 |
-
<div className="absolute top-full right-0 mt-2 z-50 bg-surface border border-border rounded-
|
| 82 |
<div className="text-xs font-semibold mb-2 text-text">{status}</div>
|
| 83 |
{results && (
|
| 84 |
<div className="text-xs text-text-muted space-y-1">
|
|
|
|
| 78 |
</button>
|
| 79 |
|
| 80 |
{status && (
|
| 81 |
+
<div className="absolute top-full right-0 mt-2 z-50 bg-surface border border-border rounded-none shadow-lg p-3 min-w-[300px]">
|
| 82 |
<div className="text-xs font-semibold mb-2 text-text">{status}</div>
|
| 83 |
{results && (
|
| 84 |
<div className="text-xs text-text-muted space-y-1">
|
app/components/models/TokenCalculationCard.tsx
CHANGED
|
@@ -52,7 +52,7 @@ export default function TokenCalculationCard({
|
|
| 52 |
|
| 53 |
<div className="pt-3 border-t border-border-subtle">
|
| 54 |
<strong className="text-text">Step-by-Step Calculation:</strong>
|
| 55 |
-
<div className="mt-2 space-y-2 font-mono text-xs bg-[rgba(139,111,71,0.05)] p-3 rounded-
|
| 56 |
<div>
|
| 57 |
<span className="text-text-muted">Min estimate:</span> {paramsAbs.toLocaleString()} params Γ {ratioMin} = {tokensEstMin ? (tokensEstMin / 1e9).toFixed(1) : 'β'}B tokens
|
| 58 |
</div>
|
|
|
|
| 52 |
|
| 53 |
<div className="pt-3 border-t border-border-subtle">
|
| 54 |
<strong className="text-text">Step-by-Step Calculation:</strong>
|
| 55 |
+
<div className="mt-2 space-y-2 font-mono text-xs bg-[rgba(139,111,71,0.05)] p-3 rounded-none">
|
| 56 |
<div>
|
| 57 |
<span className="text-text-muted">Min estimate:</span> {paramsAbs.toLocaleString()} params Γ {ratioMin} = {tokensEstMin ? (tokensEstMin / 1e9).toFixed(1) : 'β'}B tokens
|
| 58 |
</div>
|
app/components/models/TokenEstimateTooltip.tsx
CHANGED
|
@@ -45,7 +45,7 @@ export default function TokenEstimateTooltip({
|
|
| 45 |
|
| 46 |
{isOpen && (
|
| 47 |
<div
|
| 48 |
-
className="absolute z-50 left-0 top-full mt-2 w-80 bg-surface border border-border rounded-
|
| 49 |
onClick={(e) => e.stopPropagation()}
|
| 50 |
>
|
| 51 |
<div className="font-semibold text-text mb-2">Token Estimate Calculation</div>
|
|
|
|
| 45 |
|
| 46 |
{isOpen && (
|
| 47 |
<div
|
| 48 |
+
className="absolute z-50 left-0 top-full mt-2 w-80 bg-surface border border-border rounded-none shadow-lg p-4 text-xs"
|
| 49 |
onClick={(e) => e.stopPropagation()}
|
| 50 |
>
|
| 51 |
<div className="font-semibold text-text mb-2">Token Estimate Calculation</div>
|
app/deals/DealModal.tsx
CHANGED
|
@@ -51,11 +51,11 @@ export default function DealModal({ deal, isOpen, onClose }: DealModalProps) {
|
|
| 51 |
onClick={onClose}
|
| 52 |
>
|
| 53 |
<div
|
| 54 |
-
className="bg-surface rounded-
|
| 55 |
onClick={(e) => e.stopPropagation()}
|
| 56 |
>
|
| 57 |
{/* Header */}
|
| 58 |
-
<div className="sticky top-0 bg-surface border-b border-border px-
|
| 59 |
<div className="flex-1">
|
| 60 |
<h2 className="text-2xl font-semibold mb-1">
|
| 61 |
{deal.provider} β {deal.buyer}
|
|
@@ -249,7 +249,7 @@ export default function DealModal({ deal, isOpen, onClose }: DealModalProps) {
|
|
| 249 |
<h3 className="text-lg font-semibold mb-4">Pricing Normalizations</h3>
|
| 250 |
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
|
| 251 |
{normalizations.map((norm, idx) => (
|
| 252 |
-
<div key={idx} className="border border-border-subtle rounded-
|
| 253 |
<div className="text-xs text-text-muted mb-1">Per {norm.unitType}</div>
|
| 254 |
<div className="font-medium">
|
| 255 |
{norm.normalizedCostPerUnit < 0.001
|
|
|
|
| 51 |
onClick={onClose}
|
| 52 |
>
|
| 53 |
<div
|
| 54 |
+
className="bg-surface rounded-none shadow-xl max-w-4xl w-full max-h-[90vh] overflow-y-auto"
|
| 55 |
onClick={(e) => e.stopPropagation()}
|
| 56 |
>
|
| 57 |
{/* Header */}
|
| 58 |
+
<div className="sticky top-0 bg-surface border-b border-border px-4 py-3 flex items-start justify-between">
|
| 59 |
<div className="flex-1">
|
| 60 |
<h2 className="text-2xl font-semibold mb-1">
|
| 61 |
{deal.provider} β {deal.buyer}
|
|
|
|
| 249 |
<h3 className="text-lg font-semibold mb-4">Pricing Normalizations</h3>
|
| 250 |
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
|
| 251 |
{normalizations.map((norm, idx) => (
|
| 252 |
+
<div key={idx} className="border border-border-subtle rounded-none p-3">
|
| 253 |
<div className="text-xs text-text-muted mb-1">Per {norm.unitType}</div>
|
| 254 |
<div className="font-medium">
|
| 255 |
{norm.normalizedCostPerUnit < 0.001
|
app/deals/DealsClient.tsx
CHANGED
|
@@ -391,7 +391,7 @@ export default function DealsClient({ initialDeals }: DealsClientProps) {
|
|
| 391 |
<thead>
|
| 392 |
<tr className="bg-border-subtle">
|
| 393 |
<th
|
| 394 |
-
className="cursor-pointer hover:bg-border select-none
|
| 395 |
onClick={() => handleSort('provider')}
|
| 396 |
title="Click to sort by provider"
|
| 397 |
>
|
|
@@ -508,7 +508,7 @@ export default function DealsClient({ initialDeals }: DealsClientProps) {
|
|
| 508 |
onClick={() => toggleGroup(groupKey)}
|
| 509 |
className="cursor-pointer bg-border-subtle hover:bg-border transition-colors"
|
| 510 |
>
|
| 511 |
-
<td colSpan={7}
|
| 512 |
<div className="flex items-center justify-between">
|
| 513 |
<div className="flex items-center gap-3">
|
| 514 |
<span className="text-xs text-text-muted">{isExpanded ? 'βΌ' : 'βΆ'}</span>
|
|
@@ -539,14 +539,14 @@ export default function DealsClient({ initialDeals }: DealsClientProps) {
|
|
| 539 |
onClick={() => handleDealClick(deal)}
|
| 540 |
className="cursor-pointer transition-colors border-b border-border-subtle last:border-0 hover:bg-[rgba(232,225,217,0.3)]"
|
| 541 |
>
|
| 542 |
-
<td className="
|
| 543 |
<div className="flex items-center gap-2">
|
| 544 |
<div className="font-medium text-accent hover:text-accent-hover">
|
| 545 |
{deal.provider}
|
| 546 |
</div>
|
| 547 |
{deal.discoveredVia === 'exa' && (
|
| 548 |
<span
|
| 549 |
-
className="text-[10px] px-1.5 py-0.5 bg-accent/20 text-accent rounded-
|
| 550 |
title={`Source: Exa${deal.exaQuery ? ` (${deal.exaQuery})` : ''}${deal.exaScore ? ` - Score: ${deal.exaScore.toFixed(2)}` : ''}`}
|
| 551 |
>
|
| 552 |
ARTICLE
|
|
@@ -554,7 +554,7 @@ export default function DealsClient({ initialDeals }: DealsClientProps) {
|
|
| 554 |
)}
|
| 555 |
{deal.discoveredVia && deal.discoveredVia !== 'exa' && (
|
| 556 |
<span
|
| 557 |
-
className="text-[10px] px-1.5 py-0.5 bg-border-subtle text-text-muted rounded-
|
| 558 |
title={`Discovered via ${deal.discoveredVia}`}
|
| 559 |
>
|
| 560 |
{deal.discoveredVia.toUpperCase()}
|
|
@@ -562,10 +562,10 @@ export default function DealsClient({ initialDeals }: DealsClientProps) {
|
|
| 562 |
)}
|
| 563 |
</div>
|
| 564 |
</td>
|
| 565 |
-
<td
|
| 566 |
<div className="text-sm">{deal.buyer}</div>
|
| 567 |
</td>
|
| 568 |
-
<td
|
| 569 |
<span className="badge badge-secondary text-xs">{deal.modality}</span>
|
| 570 |
</td>
|
| 571 |
<td className="px-4 py-3 text-right">
|
|
@@ -589,7 +589,7 @@ export default function DealsClient({ initialDeals }: DealsClientProps) {
|
|
| 589 |
<span className="text-text-muted/40 text-xs">β</span>
|
| 590 |
)}
|
| 591 |
</td>
|
| 592 |
-
<td
|
| 593 |
<div className="text-sm text-text-muted/80">{formatDate(deal.date)}</div>
|
| 594 |
</td>
|
| 595 |
</tr>
|
|
@@ -605,14 +605,14 @@ export default function DealsClient({ initialDeals }: DealsClientProps) {
|
|
| 605 |
onClick={() => handleDealClick(deal)}
|
| 606 |
className="cursor-pointer transition-colors border-b border-border-subtle last:border-0 hover:bg-[rgba(232,225,217,0.3)]"
|
| 607 |
>
|
| 608 |
-
<td
|
| 609 |
<div className="flex items-center gap-2">
|
| 610 |
<div className="font-medium text-accent hover:text-accent-hover">
|
| 611 |
{deal.provider}
|
| 612 |
</div>
|
| 613 |
{deal.discoveredVia === 'exa' && (
|
| 614 |
<span
|
| 615 |
-
className="text-[10px] px-1.5 py-0.5 bg-accent/20 text-accent rounded-
|
| 616 |
title={`Source: Exa${deal.exaQuery ? ` (${deal.exaQuery})` : ''}${deal.exaScore ? ` - Score: ${deal.exaScore.toFixed(2)}` : ''}`}
|
| 617 |
>
|
| 618 |
ARTICLE
|
|
@@ -620,7 +620,7 @@ export default function DealsClient({ initialDeals }: DealsClientProps) {
|
|
| 620 |
)}
|
| 621 |
{deal.discoveredVia && deal.discoveredVia !== 'exa' && (
|
| 622 |
<span
|
| 623 |
-
className="text-[10px] px-1.5 py-0.5 bg-border-subtle text-text-muted rounded-
|
| 624 |
title={`Discovered via ${deal.discoveredVia}`}
|
| 625 |
>
|
| 626 |
{deal.discoveredVia.toUpperCase()}
|
|
@@ -628,10 +628,10 @@ export default function DealsClient({ initialDeals }: DealsClientProps) {
|
|
| 628 |
)}
|
| 629 |
</div>
|
| 630 |
</td>
|
| 631 |
-
<td
|
| 632 |
<div className="text-sm">{deal.buyer}</div>
|
| 633 |
</td>
|
| 634 |
-
<td
|
| 635 |
<span className="badge badge-secondary text-xs">{deal.modality}</span>
|
| 636 |
</td>
|
| 637 |
<td className="px-4 py-3 text-right">
|
|
@@ -655,7 +655,7 @@ export default function DealsClient({ initialDeals }: DealsClientProps) {
|
|
| 655 |
<span className="text-text-muted/40 text-xs">β</span>
|
| 656 |
)}
|
| 657 |
</td>
|
| 658 |
-
<td
|
| 659 |
<div className="text-sm text-text-muted/80">{formatDate(deal.date)}</div>
|
| 660 |
</td>
|
| 661 |
</tr>
|
|
@@ -712,7 +712,7 @@ function PriceCellWithTooltip({ deal }: { deal: Deal }) {
|
|
| 712 |
</div>
|
| 713 |
</div>
|
| 714 |
{showTooltip && (
|
| 715 |
-
<div className="absolute right-0 top-full mt-2 z-50 bg-surface border border-border rounded-
|
| 716 |
<div className="text-xs font-semibold mb-2 text-text">Normalized Pricing</div>
|
| 717 |
<div className="space-y-2">
|
| 718 |
{normalizations.map((norm, idx) => (
|
|
|
|
| 391 |
<thead>
|
| 392 |
<tr className="bg-border-subtle">
|
| 393 |
<th
|
| 394 |
+
className="cursor-pointer hover:bg-border select-none"
|
| 395 |
onClick={() => handleSort('provider')}
|
| 396 |
title="Click to sort by provider"
|
| 397 |
>
|
|
|
|
| 508 |
onClick={() => toggleGroup(groupKey)}
|
| 509 |
className="cursor-pointer bg-border-subtle hover:bg-border transition-colors"
|
| 510 |
>
|
| 511 |
+
<td colSpan={7}>
|
| 512 |
<div className="flex items-center justify-between">
|
| 513 |
<div className="flex items-center gap-3">
|
| 514 |
<span className="text-xs text-text-muted">{isExpanded ? 'βΌ' : 'βΆ'}</span>
|
|
|
|
| 539 |
onClick={() => handleDealClick(deal)}
|
| 540 |
className="cursor-pointer transition-colors border-b border-border-subtle last:border-0 hover:bg-[rgba(232,225,217,0.3)]"
|
| 541 |
>
|
| 542 |
+
<td className="pl-6">
|
| 543 |
<div className="flex items-center gap-2">
|
| 544 |
<div className="font-medium text-accent hover:text-accent-hover">
|
| 545 |
{deal.provider}
|
| 546 |
</div>
|
| 547 |
{deal.discoveredVia === 'exa' && (
|
| 548 |
<span
|
| 549 |
+
className="text-[10px] px-1.5 py-0.5 bg-accent/20 text-accent rounded-none font-mono"
|
| 550 |
title={`Source: Exa${deal.exaQuery ? ` (${deal.exaQuery})` : ''}${deal.exaScore ? ` - Score: ${deal.exaScore.toFixed(2)}` : ''}`}
|
| 551 |
>
|
| 552 |
ARTICLE
|
|
|
|
| 554 |
)}
|
| 555 |
{deal.discoveredVia && deal.discoveredVia !== 'exa' && (
|
| 556 |
<span
|
| 557 |
+
className="text-[10px] px-1.5 py-0.5 bg-border-subtle text-text-muted rounded-none"
|
| 558 |
title={`Discovered via ${deal.discoveredVia}`}
|
| 559 |
>
|
| 560 |
{deal.discoveredVia.toUpperCase()}
|
|
|
|
| 562 |
)}
|
| 563 |
</div>
|
| 564 |
</td>
|
| 565 |
+
<td>
|
| 566 |
<div className="text-sm">{deal.buyer}</div>
|
| 567 |
</td>
|
| 568 |
+
<td>
|
| 569 |
<span className="badge badge-secondary text-xs">{deal.modality}</span>
|
| 570 |
</td>
|
| 571 |
<td className="px-4 py-3 text-right">
|
|
|
|
| 589 |
<span className="text-text-muted/40 text-xs">β</span>
|
| 590 |
)}
|
| 591 |
</td>
|
| 592 |
+
<td>
|
| 593 |
<div className="text-sm text-text-muted/80">{formatDate(deal.date)}</div>
|
| 594 |
</td>
|
| 595 |
</tr>
|
|
|
|
| 605 |
onClick={() => handleDealClick(deal)}
|
| 606 |
className="cursor-pointer transition-colors border-b border-border-subtle last:border-0 hover:bg-[rgba(232,225,217,0.3)]"
|
| 607 |
>
|
| 608 |
+
<td>
|
| 609 |
<div className="flex items-center gap-2">
|
| 610 |
<div className="font-medium text-accent hover:text-accent-hover">
|
| 611 |
{deal.provider}
|
| 612 |
</div>
|
| 613 |
{deal.discoveredVia === 'exa' && (
|
| 614 |
<span
|
| 615 |
+
className="text-[10px] px-1.5 py-0.5 bg-accent/20 text-accent rounded-none font-mono"
|
| 616 |
title={`Source: Exa${deal.exaQuery ? ` (${deal.exaQuery})` : ''}${deal.exaScore ? ` - Score: ${deal.exaScore.toFixed(2)}` : ''}`}
|
| 617 |
>
|
| 618 |
ARTICLE
|
|
|
|
| 620 |
)}
|
| 621 |
{deal.discoveredVia && deal.discoveredVia !== 'exa' && (
|
| 622 |
<span
|
| 623 |
+
className="text-[10px] px-1.5 py-0.5 bg-border-subtle text-text-muted rounded-none"
|
| 624 |
title={`Discovered via ${deal.discoveredVia}`}
|
| 625 |
>
|
| 626 |
{deal.discoveredVia.toUpperCase()}
|
|
|
|
| 628 |
)}
|
| 629 |
</div>
|
| 630 |
</td>
|
| 631 |
+
<td>
|
| 632 |
<div className="text-sm">{deal.buyer}</div>
|
| 633 |
</td>
|
| 634 |
+
<td>
|
| 635 |
<span className="badge badge-secondary text-xs">{deal.modality}</span>
|
| 636 |
</td>
|
| 637 |
<td className="px-4 py-3 text-right">
|
|
|
|
| 655 |
<span className="text-text-muted/40 text-xs">β</span>
|
| 656 |
)}
|
| 657 |
</td>
|
| 658 |
+
<td>
|
| 659 |
<div className="text-sm text-text-muted/80">{formatDate(deal.date)}</div>
|
| 660 |
</td>
|
| 661 |
</tr>
|
|
|
|
| 712 |
</div>
|
| 713 |
</div>
|
| 714 |
{showTooltip && (
|
| 715 |
+
<div className="absolute right-0 top-full mt-2 z-50 bg-surface border border-border rounded-none shadow-lg p-3 min-w-[280px]">
|
| 716 |
<div className="text-xs font-semibold mb-2 text-text">Normalized Pricing</div>
|
| 717 |
<div className="space-y-2">
|
| 718 |
{normalizations.map((norm, idx) => (
|
app/deals/[id]/page.tsx
CHANGED
|
@@ -62,13 +62,13 @@ export default async function DealDetailPage({
|
|
| 62 |
|
| 63 |
<div className="max-w-4xl">
|
| 64 |
{/* Header Card */}
|
| 65 |
-
<div className="card mb-
|
| 66 |
-
<div className="flex items-start justify-between mb-
|
| 67 |
<div>
|
| 68 |
-
<h1 className="text-
|
| 69 |
{deal.provider} β {deal.buyer}
|
| 70 |
</h1>
|
| 71 |
-
<p className="text-text-muted">{deal.dataType}</p>
|
| 72 |
</div>
|
| 73 |
<div className="flex gap-2">
|
| 74 |
<span className="badge badge-secondary">{deal.modality}</span>
|
|
@@ -78,7 +78,7 @@ export default async function DealDetailPage({
|
|
| 78 |
</div>
|
| 79 |
</div>
|
| 80 |
|
| 81 |
-
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 pt-
|
| 82 |
<div>
|
| 83 |
<div className="text-sm text-text-muted mb-1">Price</div>
|
| 84 |
<div className="text-xl font-semibold">{formatPrice(deal)}</div>
|
|
@@ -115,16 +115,16 @@ export default async function DealDetailPage({
|
|
| 115 |
|
| 116 |
{/* Reported Terms */}
|
| 117 |
{deal.reportedTerms && (
|
| 118 |
-
<div className="card mb-
|
| 119 |
-
<h2 className="text-
|
| 120 |
-
<p className="text-
|
| 121 |
</div>
|
| 122 |
)}
|
| 123 |
|
| 124 |
{/* Deal Details & Compensation */}
|
| 125 |
-
<div className="grid md:grid-cols-2 gap-
|
| 126 |
<div className="card">
|
| 127 |
-
<h3 className="text-
|
| 128 |
<div className="space-y-2">
|
| 129 |
<div className="flex justify-between">
|
| 130 |
<span className="text-text-muted">Deal Type</span>
|
|
@@ -158,7 +158,7 @@ export default async function DealDetailPage({
|
|
| 158 |
</div>
|
| 159 |
|
| 160 |
<div className="card">
|
| 161 |
-
<h3 className="text-
|
| 162 |
<div className="space-y-2">
|
| 163 |
<div className="flex justify-between">
|
| 164 |
<span className="text-text-muted">Compensated</span>
|
|
@@ -190,15 +190,15 @@ export default async function DealDetailPage({
|
|
| 190 |
|
| 191 |
{/* Notes */}
|
| 192 |
{deal.notes && (
|
| 193 |
-
<div className="card mb-
|
| 194 |
-
<h3 className="text-
|
| 195 |
-
<p className="leading-relaxed">{deal.notes}</p>
|
| 196 |
</div>
|
| 197 |
)}
|
| 198 |
|
| 199 |
{/* Sources - Hyperlinked */}
|
| 200 |
-
<div className="card mb-
|
| 201 |
-
<h3 className="text-
|
| 202 |
<div className="space-y-2">
|
| 203 |
{deal.sourcePrimary && (
|
| 204 |
<div className="text-text-muted">
|
|
|
|
| 62 |
|
| 63 |
<div className="max-w-4xl">
|
| 64 |
{/* Header Card */}
|
| 65 |
+
<div className="card mb-6">
|
| 66 |
+
<div className="flex items-start justify-between mb-4">
|
| 67 |
<div>
|
| 68 |
+
<h1 className="text-2xl font-semibold mb-1">
|
| 69 |
{deal.provider} β {deal.buyer}
|
| 70 |
</h1>
|
| 71 |
+
<p className="text-text-muted text-sm">{deal.dataType}</p>
|
| 72 |
</div>
|
| 73 |
<div className="flex gap-2">
|
| 74 |
<span className="badge badge-secondary">{deal.modality}</span>
|
|
|
|
| 78 |
</div>
|
| 79 |
</div>
|
| 80 |
|
| 81 |
+
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 pt-4 border-t border-border">
|
| 82 |
<div>
|
| 83 |
<div className="text-sm text-text-muted mb-1">Price</div>
|
| 84 |
<div className="text-xl font-semibold">{formatPrice(deal)}</div>
|
|
|
|
| 115 |
|
| 116 |
{/* Reported Terms */}
|
| 117 |
{deal.reportedTerms && (
|
| 118 |
+
<div className="card mb-6">
|
| 119 |
+
<h2 className="text-lg font-semibold mb-3">Reported Terms</h2>
|
| 120 |
+
<p className="text-sm leading-relaxed">{deal.reportedTerms}</p>
|
| 121 |
</div>
|
| 122 |
)}
|
| 123 |
|
| 124 |
{/* Deal Details & Compensation */}
|
| 125 |
+
<div className="grid md:grid-cols-2 gap-4 mb-6">
|
| 126 |
<div className="card">
|
| 127 |
+
<h3 className="text-base font-semibold mb-3">Deal Details</h3>
|
| 128 |
<div className="space-y-2">
|
| 129 |
<div className="flex justify-between">
|
| 130 |
<span className="text-text-muted">Deal Type</span>
|
|
|
|
| 158 |
</div>
|
| 159 |
|
| 160 |
<div className="card">
|
| 161 |
+
<h3 className="text-base font-semibold mb-3">Creator Compensation</h3>
|
| 162 |
<div className="space-y-2">
|
| 163 |
<div className="flex justify-between">
|
| 164 |
<span className="text-text-muted">Compensated</span>
|
|
|
|
| 190 |
|
| 191 |
{/* Notes */}
|
| 192 |
{deal.notes && (
|
| 193 |
+
<div className="card mb-6">
|
| 194 |
+
<h3 className="text-base font-semibold mb-3">Notes</h3>
|
| 195 |
+
<p className="text-sm leading-relaxed">{deal.notes}</p>
|
| 196 |
</div>
|
| 197 |
)}
|
| 198 |
|
| 199 |
{/* Sources - Hyperlinked */}
|
| 200 |
+
<div className="card mb-6">
|
| 201 |
+
<h3 className="text-base font-semibold mb-3">Sources</h3>
|
| 202 |
<div className="space-y-2">
|
| 203 |
{deal.sourcePrimary && (
|
| 204 |
<div className="text-text-muted">
|
app/globals.css
CHANGED
|
@@ -120,21 +120,21 @@
|
|
| 120 |
}
|
| 121 |
|
| 122 |
@layer components {
|
| 123 |
-
/* Container - Max Width with
|
| 124 |
.container-content {
|
| 125 |
-
@apply px-
|
| 126 |
max-width: var(--max-width-content);
|
| 127 |
margin: 0 auto;
|
| 128 |
}
|
| 129 |
|
| 130 |
.container-narrow {
|
| 131 |
-
@apply mx-auto px-
|
| 132 |
max-width: var(--max-width-narrow);
|
| 133 |
}
|
| 134 |
|
| 135 |
-
/* Cards - Minimalist, Sharp */
|
| 136 |
.card {
|
| 137 |
-
@apply bg-surface border border-border rounded-
|
| 138 |
box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.05);
|
| 139 |
}
|
| 140 |
|
|
@@ -147,9 +147,9 @@
|
|
| 147 |
transform: translateY(-1px);
|
| 148 |
}
|
| 149 |
|
| 150 |
-
/* Buttons - Minimalist, Sharp */
|
| 151 |
.btn {
|
| 152 |
-
@apply px-4 py-2 rounded-
|
| 153 |
@apply focus:outline-none focus:ring-2 focus:ring-offset-2;
|
| 154 |
}
|
| 155 |
|
|
@@ -177,11 +177,11 @@
|
|
| 177 |
|
| 178 |
.table th {
|
| 179 |
@apply text-left font-semibold text-xs text-text-muted uppercase tracking-wide;
|
| 180 |
-
@apply select-none;
|
| 181 |
}
|
| 182 |
|
| 183 |
.table td {
|
| 184 |
-
@apply border-b border-border-subtle;
|
| 185 |
}
|
| 186 |
|
| 187 |
.table tbody tr:hover {
|
|
@@ -192,9 +192,9 @@
|
|
| 192 |
@apply border-b-0;
|
| 193 |
}
|
| 194 |
|
| 195 |
-
/* Badges/Tags - Subtle, Sharp */
|
| 196 |
.badge {
|
| 197 |
-
@apply inline-flex items-center px-2.5 py-0.5 rounded-
|
| 198 |
}
|
| 199 |
|
| 200 |
.badge-primary {
|
|
@@ -206,9 +206,9 @@
|
|
| 206 |
@apply badge bg-border text-text-muted;
|
| 207 |
}
|
| 208 |
|
| 209 |
-
/* Inputs - Clean, Sharp */
|
| 210 |
.input {
|
| 211 |
-
@apply w-full px-4 py-2 border border-border rounded-
|
| 212 |
@apply bg-surface text-text;
|
| 213 |
@apply focus:outline-none focus:ring-2 focus:ring-accent focus:border-transparent;
|
| 214 |
@apply transition-all duration-200;
|
|
@@ -234,9 +234,9 @@
|
|
| 234 |
text-wrap: balance;
|
| 235 |
}
|
| 236 |
|
| 237 |
-
/* Spacing Utilities */
|
| 238 |
.section-padding {
|
| 239 |
-
@apply py-
|
| 240 |
}
|
| 241 |
|
| 242 |
/* Modal utilities */
|
|
@@ -260,7 +260,7 @@
|
|
| 260 |
|
| 261 |
.overflow-x-auto::-webkit-scrollbar-thumb {
|
| 262 |
background-color: var(--color-border);
|
| 263 |
-
border-radius:
|
| 264 |
}
|
| 265 |
|
| 266 |
.overflow-x-auto::-webkit-scrollbar-thumb:hover {
|
|
@@ -271,7 +271,7 @@
|
|
| 271 |
.timeline-container {
|
| 272 |
max-width: 1200px;
|
| 273 |
margin: 0 auto;
|
| 274 |
-
padding:
|
| 275 |
}
|
| 276 |
|
| 277 |
.timeline-header {
|
|
@@ -377,11 +377,11 @@
|
|
| 377 |
|
| 378 |
/* Progress bar - container and fill using CSS custom properties */
|
| 379 |
.progress-bar-container {
|
| 380 |
-
@apply w-full bg-border-subtle rounded-
|
| 381 |
}
|
| 382 |
|
| 383 |
.progress-bar-fill {
|
| 384 |
-
@apply bg-accent h-1.5 rounded-
|
| 385 |
width: calc(var(--progress-percentage, 0) * 1%);
|
| 386 |
}
|
| 387 |
|
|
|
|
| 120 |
}
|
| 121 |
|
| 122 |
@layer components {
|
| 123 |
+
/* Container - Max Width with Reduced Padding */
|
| 124 |
.container-content {
|
| 125 |
+
@apply px-2;
|
| 126 |
max-width: var(--max-width-content);
|
| 127 |
margin: 0 auto;
|
| 128 |
}
|
| 129 |
|
| 130 |
.container-narrow {
|
| 131 |
+
@apply mx-auto px-2;
|
| 132 |
max-width: var(--max-width-narrow);
|
| 133 |
}
|
| 134 |
|
| 135 |
+
/* Cards - Minimalist, Sharp, No Rounded Edges */
|
| 136 |
.card {
|
| 137 |
+
@apply bg-surface border border-border rounded-none p-4;
|
| 138 |
box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.05);
|
| 139 |
}
|
| 140 |
|
|
|
|
| 147 |
transform: translateY(-1px);
|
| 148 |
}
|
| 149 |
|
| 150 |
+
/* Buttons - Minimalist, Sharp, No Rounded Edges */
|
| 151 |
.btn {
|
| 152 |
+
@apply px-4 py-2 rounded-none font-medium transition-all duration-200;
|
| 153 |
@apply focus:outline-none focus:ring-2 focus:ring-offset-2;
|
| 154 |
}
|
| 155 |
|
|
|
|
| 177 |
|
| 178 |
.table th {
|
| 179 |
@apply text-left font-semibold text-xs text-text-muted uppercase tracking-wide;
|
| 180 |
+
@apply select-none px-3 py-2;
|
| 181 |
}
|
| 182 |
|
| 183 |
.table td {
|
| 184 |
+
@apply border-b border-border-subtle px-3 py-2;
|
| 185 |
}
|
| 186 |
|
| 187 |
.table tbody tr:hover {
|
|
|
|
| 192 |
@apply border-b-0;
|
| 193 |
}
|
| 194 |
|
| 195 |
+
/* Badges/Tags - Subtle, Sharp, No Rounded Edges */
|
| 196 |
.badge {
|
| 197 |
+
@apply inline-flex items-center px-2.5 py-0.5 rounded-none text-xs font-medium;
|
| 198 |
}
|
| 199 |
|
| 200 |
.badge-primary {
|
|
|
|
| 206 |
@apply badge bg-border text-text-muted;
|
| 207 |
}
|
| 208 |
|
| 209 |
+
/* Inputs - Clean, Sharp, No Rounded Edges */
|
| 210 |
.input {
|
| 211 |
+
@apply w-full px-4 py-2 border border-border rounded-none;
|
| 212 |
@apply bg-surface text-text;
|
| 213 |
@apply focus:outline-none focus:ring-2 focus:ring-accent focus:border-transparent;
|
| 214 |
@apply transition-all duration-200;
|
|
|
|
| 234 |
text-wrap: balance;
|
| 235 |
}
|
| 236 |
|
| 237 |
+
/* Spacing Utilities - Reduced Padding */
|
| 238 |
.section-padding {
|
| 239 |
+
@apply py-4 md:py-6;
|
| 240 |
}
|
| 241 |
|
| 242 |
/* Modal utilities */
|
|
|
|
| 260 |
|
| 261 |
.overflow-x-auto::-webkit-scrollbar-thumb {
|
| 262 |
background-color: var(--color-border);
|
| 263 |
+
border-radius: 0;
|
| 264 |
}
|
| 265 |
|
| 266 |
.overflow-x-auto::-webkit-scrollbar-thumb:hover {
|
|
|
|
| 271 |
.timeline-container {
|
| 272 |
max-width: 1200px;
|
| 273 |
margin: 0 auto;
|
| 274 |
+
padding: 16px 8px;
|
| 275 |
}
|
| 276 |
|
| 277 |
.timeline-header {
|
|
|
|
| 377 |
|
| 378 |
/* Progress bar - container and fill using CSS custom properties */
|
| 379 |
.progress-bar-container {
|
| 380 |
+
@apply w-full bg-border-subtle rounded-none h-1.5 relative overflow-hidden;
|
| 381 |
}
|
| 382 |
|
| 383 |
.progress-bar-fill {
|
| 384 |
+
@apply bg-accent h-1.5 rounded-none absolute top-0 left-0 transition-all;
|
| 385 |
width: calc(var(--progress-percentage, 0) * 1%);
|
| 386 |
}
|
| 387 |
|
app/linkages/LinkagesClient.tsx
ADDED
|
@@ -0,0 +1,622 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'use client'
|
| 2 |
+
|
| 3 |
+
import { useState, useEffect } from 'react'
|
| 4 |
+
import React from 'react'
|
| 5 |
+
import Link from 'next/link'
|
| 6 |
+
import { formatDate } from '@/lib/utils'
|
| 7 |
+
import Tooltip from '@/app/components/Tooltip'
|
| 8 |
+
|
| 9 |
+
interface Linkage {
|
| 10 |
+
id: string
|
| 11 |
+
linkageType: string
|
| 12 |
+
linkageStrength: string
|
| 13 |
+
impactInference: string | null
|
| 14 |
+
deal: {
|
| 15 |
+
id: string
|
| 16 |
+
provider: string
|
| 17 |
+
buyer: string
|
| 18 |
+
modality: string
|
| 19 |
+
priceUsd: number | null
|
| 20 |
+
date: string | null
|
| 21 |
+
}
|
| 22 |
+
model: {
|
| 23 |
+
id: string
|
| 24 |
+
modelId: string
|
| 25 |
+
provider: string
|
| 26 |
+
family: string | null
|
| 27 |
+
tokensEstMid: number | null
|
| 28 |
+
}
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
interface LinkagesClientProps {
|
| 32 |
+
initialLinkages: Linkage[]
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
function formatTokens(value: number | null | undefined): string {
|
| 36 |
+
if (!value) return 'β'
|
| 37 |
+
if (value >= 1e15) return `${(value / 1e15).toFixed(1)}P`
|
| 38 |
+
if (value >= 1e12) return `${(value / 1e12).toFixed(1)}T`
|
| 39 |
+
if (value >= 1e9) return `${(value / 1e9).toFixed(1)}B`
|
| 40 |
+
return `${(value / 1e6).toFixed(0)}M`
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
export default function LinkagesClient({ initialLinkages }: LinkagesClientProps) {
|
| 44 |
+
const [linkages] = useState<Linkage[]>(initialLinkages)
|
| 45 |
+
const [filters, setFilters] = useState({
|
| 46 |
+
linkageType: '',
|
| 47 |
+
linkageStrength: '',
|
| 48 |
+
provider: '',
|
| 49 |
+
buyer: '',
|
| 50 |
+
modelProvider: '',
|
| 51 |
+
modality: '',
|
| 52 |
+
})
|
| 53 |
+
const [searchQuery, setSearchQuery] = useState('')
|
| 54 |
+
const [sortBy, setSortBy] = useState<{ column: string; direction: 'asc' | 'desc' }>({
|
| 55 |
+
column: 'linkageStrength',
|
| 56 |
+
direction: 'desc',
|
| 57 |
+
})
|
| 58 |
+
const [groupBy, setGroupBy] = useState<string>('')
|
| 59 |
+
const [expandedGroups, setExpandedGroups] = useState<Set<string>>(new Set())
|
| 60 |
+
|
| 61 |
+
// Filter linkages
|
| 62 |
+
let filteredLinkages = linkages.filter(linkage => {
|
| 63 |
+
if (!linkage || !linkage.deal || !linkage.model) return false
|
| 64 |
+
|
| 65 |
+
if (filters.linkageType && linkage.linkageType !== filters.linkageType) return false
|
| 66 |
+
if (filters.linkageStrength && linkage.linkageStrength !== filters.linkageStrength) return false
|
| 67 |
+
if (filters.provider && !linkage.deal.provider.toLowerCase().includes(filters.provider.toLowerCase())) return false
|
| 68 |
+
if (filters.buyer && !linkage.deal.buyer.toLowerCase().includes(filters.buyer.toLowerCase())) return false
|
| 69 |
+
if (filters.modelProvider && !linkage.model.provider.toLowerCase().includes(filters.modelProvider.toLowerCase())) return false
|
| 70 |
+
if (filters.modality && linkage.deal.modality !== filters.modality) return false
|
| 71 |
+
|
| 72 |
+
if (searchQuery) {
|
| 73 |
+
const query = searchQuery.toLowerCase()
|
| 74 |
+
if (
|
| 75 |
+
!linkage.deal.provider.toLowerCase().includes(query) &&
|
| 76 |
+
!linkage.deal.buyer.toLowerCase().includes(query) &&
|
| 77 |
+
!linkage.model.modelId.toLowerCase().includes(query) &&
|
| 78 |
+
!linkage.model.provider.toLowerCase().includes(query) &&
|
| 79 |
+
!linkage.deal.modality.toLowerCase().includes(query)
|
| 80 |
+
) {
|
| 81 |
+
return false
|
| 82 |
+
}
|
| 83 |
+
}
|
| 84 |
+
return true
|
| 85 |
+
})
|
| 86 |
+
|
| 87 |
+
// Sort linkages
|
| 88 |
+
const sortedLinkages = [...filteredLinkages].sort((a, b) => {
|
| 89 |
+
const { column, direction } = sortBy
|
| 90 |
+
let comparison = 0
|
| 91 |
+
|
| 92 |
+
switch (column) {
|
| 93 |
+
case 'deal':
|
| 94 |
+
comparison = a.deal.provider.localeCompare(b.deal.provider)
|
| 95 |
+
break
|
| 96 |
+
case 'model':
|
| 97 |
+
comparison = a.model.modelId.localeCompare(b.model.modelId)
|
| 98 |
+
break
|
| 99 |
+
case 'linkageType':
|
| 100 |
+
comparison = a.linkageType.localeCompare(b.linkageType)
|
| 101 |
+
break
|
| 102 |
+
case 'linkageStrength':
|
| 103 |
+
const strengthOrder = { 'high': 3, 'medium': 2, 'low': 1 }
|
| 104 |
+
comparison = (strengthOrder[a.linkageStrength as keyof typeof strengthOrder] || 0) -
|
| 105 |
+
(strengthOrder[b.linkageStrength as keyof typeof strengthOrder] || 0)
|
| 106 |
+
break
|
| 107 |
+
case 'date':
|
| 108 |
+
const dateA = a.deal.date || ''
|
| 109 |
+
const dateB = b.deal.date || ''
|
| 110 |
+
comparison = dateA.localeCompare(dateB)
|
| 111 |
+
break
|
| 112 |
+
default:
|
| 113 |
+
comparison = 0
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
return direction === 'asc' ? comparison : -comparison
|
| 117 |
+
})
|
| 118 |
+
|
| 119 |
+
// Extract unique values for filters
|
| 120 |
+
const linkageTypes = Array.from(new Set(linkages.map(l => l.linkageType))).filter(Boolean).sort()
|
| 121 |
+
const linkageStrengths = Array.from(new Set(linkages.map(l => l.linkageStrength))).filter(Boolean).sort()
|
| 122 |
+
const providers = Array.from(new Set(linkages.map(l => l.deal.provider))).filter(Boolean).sort()
|
| 123 |
+
const buyers = Array.from(new Set(linkages.flatMap(l => l.deal.buyer.split(',').map(b => b.trim())))).filter(Boolean).sort()
|
| 124 |
+
const modelProviders = Array.from(new Set(linkages.map(l => l.model.provider))).filter(Boolean).sort()
|
| 125 |
+
const modalities = Array.from(new Set(linkages.map(l => l.deal.modality))).filter(Boolean).sort()
|
| 126 |
+
|
| 127 |
+
// Group linkages
|
| 128 |
+
function groupLinkages(linkages: Linkage[], groupByField: string): Record<string, Linkage[]> {
|
| 129 |
+
if (!groupByField) {
|
| 130 |
+
return { 'All': linkages }
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
const groups: Record<string, Linkage[]> = {}
|
| 134 |
+
|
| 135 |
+
linkages.forEach(linkage => {
|
| 136 |
+
let groupKey = 'Unknown'
|
| 137 |
+
|
| 138 |
+
switch (groupByField) {
|
| 139 |
+
case 'linkageType':
|
| 140 |
+
groupKey = linkage.linkageType === 'temporal_overlap' ? 'Same Time Period' :
|
| 141 |
+
linkage.linkageType === 'inferred' ? 'Same Company' :
|
| 142 |
+
linkage.linkageType || 'Unknown'
|
| 143 |
+
break
|
| 144 |
+
case 'linkageStrength':
|
| 145 |
+
groupKey = linkage.linkageStrength || 'Unknown'
|
| 146 |
+
break
|
| 147 |
+
case 'provider':
|
| 148 |
+
groupKey = linkage.deal.provider || 'Unknown'
|
| 149 |
+
break
|
| 150 |
+
case 'buyer':
|
| 151 |
+
groupKey = linkage.deal.buyer || 'Unknown'
|
| 152 |
+
break
|
| 153 |
+
case 'modelProvider':
|
| 154 |
+
groupKey = linkage.model.provider || 'Unknown'
|
| 155 |
+
break
|
| 156 |
+
case 'modality':
|
| 157 |
+
groupKey = linkage.deal.modality || 'Unknown'
|
| 158 |
+
break
|
| 159 |
+
default:
|
| 160 |
+
groupKey = 'All'
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
if (!groups[groupKey]) {
|
| 164 |
+
groups[groupKey] = []
|
| 165 |
+
}
|
| 166 |
+
groups[groupKey].push(linkage)
|
| 167 |
+
})
|
| 168 |
+
|
| 169 |
+
return groups
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
const groupedLinkages = groupLinkages(sortedLinkages, groupBy)
|
| 173 |
+
const groupKeys = Object.keys(groupedLinkages).sort((a, b) => {
|
| 174 |
+
// Sort by strength order if grouping by strength
|
| 175 |
+
if (groupBy === 'linkageStrength') {
|
| 176 |
+
const strengthOrder = { 'high': 3, 'medium': 2, 'low': 1 }
|
| 177 |
+
return (strengthOrder[b.toLowerCase() as keyof typeof strengthOrder] || 0) -
|
| 178 |
+
(strengthOrder[a.toLowerCase() as keyof typeof strengthOrder] || 0)
|
| 179 |
+
}
|
| 180 |
+
return a.localeCompare(b)
|
| 181 |
+
})
|
| 182 |
+
|
| 183 |
+
const toggleGroup = (groupKey: string) => {
|
| 184 |
+
setExpandedGroups(prev => {
|
| 185 |
+
const next = new Set(prev)
|
| 186 |
+
if (next.has(groupKey)) {
|
| 187 |
+
next.delete(groupKey)
|
| 188 |
+
} else {
|
| 189 |
+
next.add(groupKey)
|
| 190 |
+
}
|
| 191 |
+
return next
|
| 192 |
+
})
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
// Expand all groups by default
|
| 196 |
+
useEffect(() => {
|
| 197 |
+
if (groupBy && expandedGroups.size === 0) {
|
| 198 |
+
setExpandedGroups(new Set(groupKeys))
|
| 199 |
+
}
|
| 200 |
+
}, [groupBy, groupKeys])
|
| 201 |
+
|
| 202 |
+
const handleSort = (column: string) => {
|
| 203 |
+
setSortBy(prev => ({
|
| 204 |
+
column,
|
| 205 |
+
direction: prev.column === column && prev.direction === 'asc' ? 'desc' : 'asc',
|
| 206 |
+
}))
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
const getSortIndicator = (column: string) => {
|
| 210 |
+
if (sortBy.column !== column) return null
|
| 211 |
+
return sortBy.direction === 'asc' ? 'β' : 'β'
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
return (
|
| 215 |
+
<>
|
| 216 |
+
{/* Search and Filters */}
|
| 217 |
+
<div className="card mb-6 p-4">
|
| 218 |
+
<div className="mb-4">
|
| 219 |
+
<input
|
| 220 |
+
type="text"
|
| 221 |
+
placeholder="Search linkages..."
|
| 222 |
+
value={searchQuery}
|
| 223 |
+
onChange={(e) => setSearchQuery(e.target.value)}
|
| 224 |
+
className="input w-full text-sm"
|
| 225 |
+
/>
|
| 226 |
+
</div>
|
| 227 |
+
|
| 228 |
+
<div className="grid grid-cols-2 md:grid-cols-6 gap-3">
|
| 229 |
+
<div>
|
| 230 |
+
<label className="block text-xs font-medium text-text-muted mb-1.5 uppercase tracking-wide">Connection Type</label>
|
| 231 |
+
<select
|
| 232 |
+
value={filters.linkageType}
|
| 233 |
+
onChange={(e) => setFilters({ ...filters, linkageType: e.target.value })}
|
| 234 |
+
className="input text-sm py-1.5"
|
| 235 |
+
>
|
| 236 |
+
<option value="">All</option>
|
| 237 |
+
{linkageTypes.map(type => (
|
| 238 |
+
<option key={type} value={type}>
|
| 239 |
+
{type === 'temporal_overlap' ? 'Same Time Period' :
|
| 240 |
+
type === 'inferred' ? 'Same Company' : type}
|
| 241 |
+
</option>
|
| 242 |
+
))}
|
| 243 |
+
</select>
|
| 244 |
+
</div>
|
| 245 |
+
|
| 246 |
+
<div>
|
| 247 |
+
<label className="block text-xs font-medium text-text-muted mb-1.5 uppercase tracking-wide">Confidence</label>
|
| 248 |
+
<select
|
| 249 |
+
value={filters.linkageStrength}
|
| 250 |
+
onChange={(e) => setFilters({ ...filters, linkageStrength: e.target.value })}
|
| 251 |
+
className="input text-sm py-1.5"
|
| 252 |
+
>
|
| 253 |
+
<option value="">All</option>
|
| 254 |
+
{linkageStrengths.map(strength => (
|
| 255 |
+
<option key={strength} value={strength}>{strength.charAt(0).toUpperCase() + strength.slice(1)}</option>
|
| 256 |
+
))}
|
| 257 |
+
</select>
|
| 258 |
+
</div>
|
| 259 |
+
|
| 260 |
+
<div>
|
| 261 |
+
<label className="block text-xs font-medium text-text-muted mb-1.5 uppercase tracking-wide">Data Provider</label>
|
| 262 |
+
<select
|
| 263 |
+
value={filters.provider}
|
| 264 |
+
onChange={(e) => setFilters({ ...filters, provider: e.target.value })}
|
| 265 |
+
className="input text-sm py-1.5"
|
| 266 |
+
>
|
| 267 |
+
<option value="">All</option>
|
| 268 |
+
{providers.map(p => (
|
| 269 |
+
<option key={p} value={p}>{p}</option>
|
| 270 |
+
))}
|
| 271 |
+
</select>
|
| 272 |
+
</div>
|
| 273 |
+
|
| 274 |
+
<div>
|
| 275 |
+
<label className="block text-xs font-medium text-text-muted mb-1.5 uppercase tracking-wide">Buyer</label>
|
| 276 |
+
<select
|
| 277 |
+
value={filters.buyer}
|
| 278 |
+
onChange={(e) => setFilters({ ...filters, buyer: e.target.value })}
|
| 279 |
+
className="input text-sm py-1.5"
|
| 280 |
+
>
|
| 281 |
+
<option value="">All</option>
|
| 282 |
+
{buyers.map(b => (
|
| 283 |
+
<option key={b} value={b}>{b}</option>
|
| 284 |
+
))}
|
| 285 |
+
</select>
|
| 286 |
+
</div>
|
| 287 |
+
|
| 288 |
+
<div>
|
| 289 |
+
<label className="block text-xs font-medium text-text-muted mb-1.5 uppercase tracking-wide">Model Provider</label>
|
| 290 |
+
<select
|
| 291 |
+
value={filters.modelProvider}
|
| 292 |
+
onChange={(e) => setFilters({ ...filters, modelProvider: e.target.value })}
|
| 293 |
+
className="input text-sm py-1.5"
|
| 294 |
+
>
|
| 295 |
+
<option value="">All</option>
|
| 296 |
+
{modelProviders.map(p => (
|
| 297 |
+
<option key={p} value={p}>{p}</option>
|
| 298 |
+
))}
|
| 299 |
+
</select>
|
| 300 |
+
</div>
|
| 301 |
+
|
| 302 |
+
<div>
|
| 303 |
+
<label className="block text-xs font-medium text-text-muted mb-1.5 uppercase tracking-wide">Modality</label>
|
| 304 |
+
<select
|
| 305 |
+
value={filters.modality}
|
| 306 |
+
onChange={(e) => setFilters({ ...filters, modality: e.target.value })}
|
| 307 |
+
className="input text-sm py-1.5"
|
| 308 |
+
>
|
| 309 |
+
<option value="">All</option>
|
| 310 |
+
{modalities.map(m => (
|
| 311 |
+
<option key={m} value={m}>{m}</option>
|
| 312 |
+
))}
|
| 313 |
+
</select>
|
| 314 |
+
</div>
|
| 315 |
+
</div>
|
| 316 |
+
</div>
|
| 317 |
+
|
| 318 |
+
{/* Grouping and Results Count */}
|
| 319 |
+
<div className="mb-3 flex items-center justify-between flex-wrap gap-3">
|
| 320 |
+
<div className="flex items-center gap-3">
|
| 321 |
+
<div className="flex items-center gap-2">
|
| 322 |
+
<label className="text-xs font-medium text-text-muted uppercase tracking-wide">Group by:</label>
|
| 323 |
+
<select
|
| 324 |
+
value={groupBy}
|
| 325 |
+
onChange={(e) => {
|
| 326 |
+
setGroupBy(e.target.value)
|
| 327 |
+
setExpandedGroups(new Set())
|
| 328 |
+
}}
|
| 329 |
+
className="input text-sm py-1.5"
|
| 330 |
+
>
|
| 331 |
+
<option value="">None</option>
|
| 332 |
+
<option value="linkageType">Connection Type</option>
|
| 333 |
+
<option value="linkageStrength">Confidence</option>
|
| 334 |
+
<option value="provider">Data Provider</option>
|
| 335 |
+
<option value="buyer">Buyer</option>
|
| 336 |
+
<option value="modelProvider">Model Provider</option>
|
| 337 |
+
<option value="modality">Modality</option>
|
| 338 |
+
</select>
|
| 339 |
+
</div>
|
| 340 |
+
{groupBy && (
|
| 341 |
+
<button
|
| 342 |
+
onClick={() => {
|
| 343 |
+
setExpandedGroups(new Set(groupKeys))
|
| 344 |
+
}}
|
| 345 |
+
className="text-xs text-accent hover:text-accent-hover"
|
| 346 |
+
>
|
| 347 |
+
Expand All
|
| 348 |
+
</button>
|
| 349 |
+
)}
|
| 350 |
+
{groupBy && (
|
| 351 |
+
<button
|
| 352 |
+
onClick={() => {
|
| 353 |
+
setExpandedGroups(new Set())
|
| 354 |
+
}}
|
| 355 |
+
className="text-xs text-accent hover:text-accent-hover"
|
| 356 |
+
>
|
| 357 |
+
Collapse All
|
| 358 |
+
</button>
|
| 359 |
+
)}
|
| 360 |
+
</div>
|
| 361 |
+
<div className="text-sm text-text-muted">
|
| 362 |
+
Showing <span className="font-medium text-text">{sortedLinkages.length}</span> of <span className="font-medium text-text">{linkages.length}</span> linkages
|
| 363 |
+
</div>
|
| 364 |
+
</div>
|
| 365 |
+
|
| 366 |
+
{/* Results */}
|
| 367 |
+
<div className="card overflow-hidden p-0">
|
| 368 |
+
<div className="overflow-x-auto">
|
| 369 |
+
<table className="table text-sm">
|
| 370 |
+
<thead>
|
| 371 |
+
<tr className="bg-border-subtle">
|
| 372 |
+
<th
|
| 373 |
+
className="cursor-pointer hover:bg-border select-none"
|
| 374 |
+
onClick={() => handleSort('deal')}
|
| 375 |
+
title="Click to sort by deal"
|
| 376 |
+
>
|
| 377 |
+
<div className="flex items-center gap-2">
|
| 378 |
+
<Tooltip content="The training data deal, showing the data provider (who owns the data) and the buyer (the AI company licensing it).">
|
| 379 |
+
<span className="underline decoration-dotted cursor-help">Deal</span>
|
| 380 |
+
</Tooltip>
|
| 381 |
+
{getSortIndicator('deal') && (
|
| 382 |
+
<span className="text-text-muted text-xs">{getSortIndicator('deal')}</span>
|
| 383 |
+
)}
|
| 384 |
+
</div>
|
| 385 |
+
<div className="text-xs font-normal text-text-muted mt-0.5">Data provider β Buyer</div>
|
| 386 |
+
</th>
|
| 387 |
+
<th
|
| 388 |
+
className="cursor-pointer hover:bg-border select-none"
|
| 389 |
+
onClick={() => handleSort('model')}
|
| 390 |
+
title="Click to sort by model"
|
| 391 |
+
>
|
| 392 |
+
<div className="flex items-center gap-2">
|
| 393 |
+
<Tooltip content="The AI model that may have been trained using data from this deal. Linkages are inferred based on company matches and timing.">
|
| 394 |
+
<span className="underline decoration-dotted cursor-help">Model</span>
|
| 395 |
+
</Tooltip>
|
| 396 |
+
{getSortIndicator('model') && (
|
| 397 |
+
<span className="text-text-muted text-xs">{getSortIndicator('model')}</span>
|
| 398 |
+
)}
|
| 399 |
+
</div>
|
| 400 |
+
<div className="text-xs font-normal text-text-muted mt-0.5">AI model that may have used this data</div>
|
| 401 |
+
</th>
|
| 402 |
+
<th
|
| 403 |
+
className="cursor-pointer hover:bg-border select-none"
|
| 404 |
+
onClick={() => handleSort('linkageType')}
|
| 405 |
+
title="Click to sort by connection type"
|
| 406 |
+
>
|
| 407 |
+
<div className="flex items-center gap-2">
|
| 408 |
+
<Tooltip content="The type of connection: 'Same Time Period' (deal and model within 1 year), 'Same Company' (buyer matches model provider), or 'Explicit' (directly stated).">
|
| 409 |
+
<span className="underline decoration-dotted cursor-help">Connection Type</span>
|
| 410 |
+
</Tooltip>
|
| 411 |
+
{getSortIndicator('linkageType') && (
|
| 412 |
+
<span className="text-text-muted text-xs">{getSortIndicator('linkageType')}</span>
|
| 413 |
+
)}
|
| 414 |
+
</div>
|
| 415 |
+
<div className="text-xs font-normal text-text-muted mt-0.5">How the link was determined</div>
|
| 416 |
+
</th>
|
| 417 |
+
<th
|
| 418 |
+
className="cursor-pointer hover:bg-border select-none"
|
| 419 |
+
onClick={() => handleSort('linkageStrength')}
|
| 420 |
+
title="Click to sort by confidence"
|
| 421 |
+
>
|
| 422 |
+
<div className="flex items-center gap-2">
|
| 423 |
+
<Tooltip content="The confidence level in the linkage: High (strong evidence like temporal overlap), Medium (moderate evidence), or Low (weak evidence).">
|
| 424 |
+
<span className="underline decoration-dotted cursor-help">Confidence</span>
|
| 425 |
+
</Tooltip>
|
| 426 |
+
{getSortIndicator('linkageStrength') && (
|
| 427 |
+
<span className="text-text-muted text-xs">{getSortIndicator('linkageStrength')}</span>
|
| 428 |
+
)}
|
| 429 |
+
</div>
|
| 430 |
+
<div className="text-xs font-normal text-text-muted mt-0.5">How certain we are</div>
|
| 431 |
+
</th>
|
| 432 |
+
<th className="text-left font-semibold">
|
| 433 |
+
<Tooltip content="An interpretation of what this linkage means - how the deal's data may have impacted the model's training.">
|
| 434 |
+
<span className="underline decoration-dotted cursor-help">What This Means</span>
|
| 435 |
+
</Tooltip>
|
| 436 |
+
<div className="text-xs font-normal text-text-muted mt-0.5">Interpretation of the connection</div>
|
| 437 |
+
</th>
|
| 438 |
+
</tr>
|
| 439 |
+
</thead>
|
| 440 |
+
<tbody>
|
| 441 |
+
{sortedLinkages.length === 0 ? (
|
| 442 |
+
<tr>
|
| 443 |
+
<td colSpan={5} className="text-center py-12 text-text-muted">
|
| 444 |
+
No linkages found matching your filters
|
| 445 |
+
</td>
|
| 446 |
+
</tr>
|
| 447 |
+
) : groupBy ? (
|
| 448 |
+
// Grouped view
|
| 449 |
+
groupKeys.map((groupKey) => {
|
| 450 |
+
const groupLinkages = groupedLinkages[groupKey]
|
| 451 |
+
const isExpanded = expandedGroups.has(groupKey)
|
| 452 |
+
|
| 453 |
+
return (
|
| 454 |
+
<React.Fragment key={groupKey}>
|
| 455 |
+
<tr
|
| 456 |
+
onClick={() => toggleGroup(groupKey)}
|
| 457 |
+
className="cursor-pointer bg-border-subtle hover:bg-border transition-colors"
|
| 458 |
+
>
|
| 459 |
+
<td colSpan={5}>
|
| 460 |
+
<div className="flex items-center justify-between">
|
| 461 |
+
<div className="flex items-center gap-3">
|
| 462 |
+
<span className="text-xs text-text-muted">{isExpanded ? 'βΌ' : 'βΆ'}</span>
|
| 463 |
+
<span className="font-semibold text-sm">{groupKey}</span>
|
| 464 |
+
<span className="text-xs text-text-muted">
|
| 465 |
+
({groupLinkages.length} {groupLinkages.length === 1 ? 'linkage' : 'linkages'})
|
| 466 |
+
</span>
|
| 467 |
+
</div>
|
| 468 |
+
</div>
|
| 469 |
+
</td>
|
| 470 |
+
</tr>
|
| 471 |
+
{isExpanded && groupLinkages.map((linkage) => (
|
| 472 |
+
<tr
|
| 473 |
+
key={linkage.id}
|
| 474 |
+
className="transition-colors border-b border-border-subtle last:border-0 hover:bg-[rgba(232,225,217,0.3)]"
|
| 475 |
+
>
|
| 476 |
+
<td className="pl-6">
|
| 477 |
+
<Link
|
| 478 |
+
href={`/deals/${linkage.deal.id}`}
|
| 479 |
+
className="font-medium text-accent hover:text-accent-hover"
|
| 480 |
+
>
|
| 481 |
+
{linkage.deal.provider} β {linkage.deal.buyer}
|
| 482 |
+
</Link>
|
| 483 |
+
<div className="text-xs text-text-muted mt-0.5">
|
| 484 |
+
{linkage.deal.modality} β’ {linkage.deal.date ? formatDate(linkage.deal.date) : 'β'}
|
| 485 |
+
</div>
|
| 486 |
+
</td>
|
| 487 |
+
<td>
|
| 488 |
+
<Link
|
| 489 |
+
href={`/models/${linkage.model.id}`}
|
| 490 |
+
className="font-medium text-accent hover:text-accent-hover"
|
| 491 |
+
>
|
| 492 |
+
{linkage.model.modelId}
|
| 493 |
+
</Link>
|
| 494 |
+
{linkage.model.family && (
|
| 495 |
+
<div className="text-xs text-text-muted mt-0.5">
|
| 496 |
+
{linkage.model.family} β’ {linkage.model.provider}
|
| 497 |
+
</div>
|
| 498 |
+
)}
|
| 499 |
+
{linkage.model.tokensEstMid && (
|
| 500 |
+
<div className="text-xs text-text-muted mt-0.5">
|
| 501 |
+
{formatTokens(linkage.model.tokensEstMid)} tokens
|
| 502 |
+
</div>
|
| 503 |
+
)}
|
| 504 |
+
</td>
|
| 505 |
+
<td>
|
| 506 |
+
<div className="flex flex-col gap-1">
|
| 507 |
+
<span className="badge badge-secondary text-xs">
|
| 508 |
+
{linkage.linkageType === 'temporal_overlap' ? 'Same Time Period' :
|
| 509 |
+
linkage.linkageType === 'inferred' ? 'Same Company' :
|
| 510 |
+
linkage.linkageType || 'β'}
|
| 511 |
+
</span>
|
| 512 |
+
<div className="text-xs text-text-muted/70">
|
| 513 |
+
{linkage.linkageType === 'temporal_overlap'
|
| 514 |
+
? 'Deal & model within 1 year'
|
| 515 |
+
: linkage.linkageType === 'inferred'
|
| 516 |
+
? 'Buyer matches model provider'
|
| 517 |
+
: ''}
|
| 518 |
+
</div>
|
| 519 |
+
</div>
|
| 520 |
+
</td>
|
| 521 |
+
<td>
|
| 522 |
+
<span className={`badge ${
|
| 523 |
+
linkage.linkageStrength === 'high'
|
| 524 |
+
? 'badge-primary'
|
| 525 |
+
: 'badge-secondary'
|
| 526 |
+
} text-xs`}>
|
| 527 |
+
{linkage.linkageStrength === 'high' ? 'High' :
|
| 528 |
+
linkage.linkageStrength === 'medium' ? 'Medium' :
|
| 529 |
+
linkage.linkageStrength === 'low' ? 'Low' :
|
| 530 |
+
'β'}
|
| 531 |
+
</span>
|
| 532 |
+
</td>
|
| 533 |
+
<td>
|
| 534 |
+
<div className="text-sm text-text leading-relaxed">
|
| 535 |
+
{linkage.impactInference || 'β'}
|
| 536 |
+
</div>
|
| 537 |
+
</td>
|
| 538 |
+
</tr>
|
| 539 |
+
))}
|
| 540 |
+
</React.Fragment>
|
| 541 |
+
)
|
| 542 |
+
})
|
| 543 |
+
) : (
|
| 544 |
+
// Ungrouped view
|
| 545 |
+
sortedLinkages.map((linkage) => (
|
| 546 |
+
<tr
|
| 547 |
+
key={linkage.id}
|
| 548 |
+
className="transition-colors border-b border-border-subtle last:border-0 hover:bg-[rgba(232,225,217,0.3)]"
|
| 549 |
+
>
|
| 550 |
+
<td>
|
| 551 |
+
<Link
|
| 552 |
+
href={`/deals/${linkage.deal.id}`}
|
| 553 |
+
className="font-medium text-accent hover:text-accent-hover"
|
| 554 |
+
>
|
| 555 |
+
{linkage.deal.provider} β {linkage.deal.buyer}
|
| 556 |
+
</Link>
|
| 557 |
+
<div className="text-xs text-text-muted mt-0.5">
|
| 558 |
+
{linkage.deal.modality} β’ {linkage.deal.date ? formatDate(linkage.deal.date) : 'β'}
|
| 559 |
+
</div>
|
| 560 |
+
</td>
|
| 561 |
+
<td>
|
| 562 |
+
<Link
|
| 563 |
+
href={`/models/${linkage.model.id}`}
|
| 564 |
+
className="font-medium text-accent hover:text-accent-hover"
|
| 565 |
+
>
|
| 566 |
+
{linkage.model.modelId}
|
| 567 |
+
</Link>
|
| 568 |
+
{linkage.model.family && (
|
| 569 |
+
<div className="text-xs text-text-muted mt-0.5">
|
| 570 |
+
{linkage.model.family} β’ {linkage.model.provider}
|
| 571 |
+
</div>
|
| 572 |
+
)}
|
| 573 |
+
{linkage.model.tokensEstMid && (
|
| 574 |
+
<div className="text-xs text-text-muted mt-0.5">
|
| 575 |
+
{formatTokens(linkage.model.tokensEstMid)} tokens
|
| 576 |
+
</div>
|
| 577 |
+
)}
|
| 578 |
+
</td>
|
| 579 |
+
<td>
|
| 580 |
+
<div className="flex flex-col gap-1">
|
| 581 |
+
<span className="badge badge-secondary text-xs">
|
| 582 |
+
{linkage.linkageType === 'temporal_overlap' ? 'Same Time Period' :
|
| 583 |
+
linkage.linkageType === 'inferred' ? 'Same Company' :
|
| 584 |
+
linkage.linkageType || 'β'}
|
| 585 |
+
</span>
|
| 586 |
+
<div className="text-xs text-text-muted/70">
|
| 587 |
+
{linkage.linkageType === 'temporal_overlap'
|
| 588 |
+
? 'Deal & model within 1 year'
|
| 589 |
+
: linkage.linkageType === 'inferred'
|
| 590 |
+
? 'Buyer matches model provider'
|
| 591 |
+
: ''}
|
| 592 |
+
</div>
|
| 593 |
+
</div>
|
| 594 |
+
</td>
|
| 595 |
+
<td>
|
| 596 |
+
<span className={`badge ${
|
| 597 |
+
linkage.linkageStrength === 'high'
|
| 598 |
+
? 'badge-primary'
|
| 599 |
+
: 'badge-secondary'
|
| 600 |
+
} text-xs`}>
|
| 601 |
+
{linkage.linkageStrength === 'high' ? 'High' :
|
| 602 |
+
linkage.linkageStrength === 'medium' ? 'Medium' :
|
| 603 |
+
linkage.linkageStrength === 'low' ? 'Low' :
|
| 604 |
+
'β'}
|
| 605 |
+
</span>
|
| 606 |
+
</td>
|
| 607 |
+
<td>
|
| 608 |
+
<div className="text-sm text-text leading-relaxed">
|
| 609 |
+
{linkage.impactInference || 'β'}
|
| 610 |
+
</div>
|
| 611 |
+
</td>
|
| 612 |
+
</tr>
|
| 613 |
+
))
|
| 614 |
+
)}
|
| 615 |
+
</tbody>
|
| 616 |
+
</table>
|
| 617 |
+
</div>
|
| 618 |
+
</div>
|
| 619 |
+
</>
|
| 620 |
+
)
|
| 621 |
+
}
|
| 622 |
+
|
app/linkages/page.tsx
CHANGED
|
@@ -1,69 +1,45 @@
|
|
| 1 |
import { prisma } from '@/lib/prisma'
|
| 2 |
-
import Link from 'next/link'
|
| 3 |
-
import { formatDate } from '@/lib/utils'
|
| 4 |
import AutoCreate from '@/app/components/linkages/AutoCreate'
|
| 5 |
-
import
|
| 6 |
|
| 7 |
async function getLinkages() {
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
priceUsd: true,
|
| 19 |
-
date: true,
|
| 20 |
-
},
|
| 21 |
},
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
},
|
| 31 |
},
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
console.error('Error fetching linkages:', error)
|
| 40 |
-
// Return empty array on error to prevent page crash
|
| 41 |
-
return []
|
| 42 |
-
}
|
| 43 |
}
|
| 44 |
|
| 45 |
-
function formatTokens(value: number | null | undefined): string {
|
| 46 |
-
if (!value) return 'β'
|
| 47 |
-
if (value >= 1e15) return `${(value / 1e15).toFixed(1)}P`
|
| 48 |
-
if (value >= 1e12) return `${(value / 1e12).toFixed(1)}T`
|
| 49 |
-
if (value >= 1e9) return `${(value / 1e9).toFixed(1)}B`
|
| 50 |
-
return `${(value / 1e6).toFixed(0)}M`
|
| 51 |
-
}
|
| 52 |
|
| 53 |
async function getDealCount() {
|
| 54 |
-
|
| 55 |
-
return await prisma.deal.count()
|
| 56 |
-
} catch {
|
| 57 |
-
return 0
|
| 58 |
-
}
|
| 59 |
}
|
| 60 |
|
| 61 |
async function getModelCount() {
|
| 62 |
-
|
| 63 |
-
return await prisma.modelRegistry.count()
|
| 64 |
-
} catch {
|
| 65 |
-
return 0
|
| 66 |
-
}
|
| 67 |
}
|
| 68 |
|
| 69 |
export default async function LinkagesPage() {
|
|
@@ -82,196 +58,68 @@ export default async function LinkagesPage() {
|
|
| 82 |
/>
|
| 83 |
|
| 84 |
{/* Header */}
|
| 85 |
-
<div className="mb-
|
| 86 |
-
<
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
</p>
|
| 91 |
-
|
| 92 |
-
{/* Simple Explanation */}
|
| 93 |
-
<div className="card bg-[rgba(139,111,71,0.05)] border border-accent/20">
|
| 94 |
-
<div className="p-4">
|
| 95 |
-
<h3 className="font-semibold text-text mb-2">What are linkages?</h3>
|
| 96 |
-
<p className="text-sm text-text-muted leading-relaxed">
|
| 97 |
-
Linkages connect training data deals to AI models. For example: if OpenAI signed a deal with News Corp in 2023,
|
| 98 |
-
and GPT-4 was released in 2023, there's a linkage suggesting the News Corp data may have been used to train GPT-4.
|
| 99 |
-
</p>
|
| 100 |
-
<p className="text-sm text-text-muted leading-relaxed mt-2">
|
| 101 |
-
The system automatically creates linkages when: (1) the deal buyer matches the model provider (e.g., OpenAI deal β OpenAI model),
|
| 102 |
-
and (2) optionally, when the deal date and model release date are close in time (within 1 year).
|
| 103 |
-
</p>
|
| 104 |
-
</div>
|
| 105 |
-
</div>
|
| 106 |
-
</div>
|
| 107 |
</div>
|
| 108 |
|
| 109 |
-
|
| 110 |
-
{/* Linkages Table */}
|
| 111 |
-
<div className="card overflow-hidden p-0">
|
| 112 |
-
<div className="overflow-x-auto">
|
| 113 |
-
<table className="table text-sm">
|
| 114 |
-
<thead>
|
| 115 |
-
<tr className="border-b border-border-subtle">
|
| 116 |
-
<th className="px-4 py-3 text-left font-semibold">
|
| 117 |
-
<Tooltip content="The training data deal, showing the data provider (who owns the data) and the buyer (the AI company licensing it).">
|
| 118 |
-
<div className="underline decoration-dotted cursor-help">Deal</div>
|
| 119 |
-
</Tooltip>
|
| 120 |
-
<div className="text-xs font-normal text-text-muted mt-0.5">Data provider β Buyer</div>
|
| 121 |
-
</th>
|
| 122 |
-
<th className="px-4 py-3 text-left font-semibold">
|
| 123 |
-
<Tooltip content="The AI model that may have been trained using data from this deal. Linkages are inferred based on company matches and timing.">
|
| 124 |
-
<div className="underline decoration-dotted cursor-help">Model</div>
|
| 125 |
-
</Tooltip>
|
| 126 |
-
<div className="text-xs font-normal text-text-muted mt-0.5">AI model that may have used this data</div>
|
| 127 |
-
</th>
|
| 128 |
-
<th className="px-4 py-3 text-left font-semibold">
|
| 129 |
-
<Tooltip content="The type of connection: 'Same Time Period' (deal and model within 1 year), 'Same Company' (buyer matches model provider), or 'Explicit' (directly stated).">
|
| 130 |
-
<div className="underline decoration-dotted cursor-help">Connection Type</div>
|
| 131 |
-
</Tooltip>
|
| 132 |
-
<div className="text-xs font-normal text-text-muted mt-0.5">How the link was determined</div>
|
| 133 |
-
</th>
|
| 134 |
-
<th className="px-4 py-3 text-left font-semibold">
|
| 135 |
-
<Tooltip content="The confidence level in the linkage: High (strong evidence like temporal overlap), Medium (moderate evidence), or Low (weak evidence).">
|
| 136 |
-
<div className="underline decoration-dotted cursor-help">Confidence</div>
|
| 137 |
-
</Tooltip>
|
| 138 |
-
<div className="text-xs font-normal text-text-muted mt-0.5">How certain we are</div>
|
| 139 |
-
</th>
|
| 140 |
-
<th className="px-4 py-3 text-left font-semibold">
|
| 141 |
-
<Tooltip content="An interpretation of what this linkage means - how the deal's data may have impacted the model's training.">
|
| 142 |
-
<div className="underline decoration-dotted cursor-help">What This Means</div>
|
| 143 |
-
</Tooltip>
|
| 144 |
-
<div className="text-xs font-normal text-text-muted mt-0.5">Interpretation of the connection</div>
|
| 145 |
-
</th>
|
| 146 |
-
</tr>
|
| 147 |
-
</thead>
|
| 148 |
-
<tbody>
|
| 149 |
-
{linkages.length === 0 ? (
|
| 150 |
-
<tr>
|
| 151 |
-
<td colSpan={5} className="text-center py-12 text-text-muted">
|
| 152 |
-
{dealCount === 0 || modelCount === 0
|
| 153 |
-
? `No ${dealCount === 0 ? 'deals' : 'models'} found. Please seed the database first.`
|
| 154 |
-
: 'No linkages found. Linkage creation will start automatically.'}
|
| 155 |
-
</td>
|
| 156 |
-
</tr>
|
| 157 |
-
) : (
|
| 158 |
-
linkages
|
| 159 |
-
.filter(linkage => linkage && linkage.deal && linkage.model)
|
| 160 |
-
.map((linkage) => (
|
| 161 |
-
<tr
|
| 162 |
-
key={linkage.id}
|
| 163 |
-
className="transition-colors border-b border-border-subtle last:border-0 hover:bg-[rgba(232,225,217,0.3)]"
|
| 164 |
-
>
|
| 165 |
-
<td className="px-4 py-3">
|
| 166 |
-
<Link
|
| 167 |
-
href={`/deals/${linkage.deal.id}`}
|
| 168 |
-
className="font-medium text-accent hover:text-accent-hover"
|
| 169 |
-
>
|
| 170 |
-
{linkage.deal.provider} β {linkage.deal.buyer}
|
| 171 |
-
</Link>
|
| 172 |
-
<div className="text-xs text-text-muted mt-0.5">
|
| 173 |
-
{linkage.deal.modality} β’ {linkage.deal.date ? formatDate(linkage.deal.date) : 'β'}
|
| 174 |
-
</div>
|
| 175 |
-
</td>
|
| 176 |
-
<td className="px-4 py-3">
|
| 177 |
-
<Link
|
| 178 |
-
href={`/models/${linkage.model.id}`}
|
| 179 |
-
className="font-medium text-accent hover:text-accent-hover"
|
| 180 |
-
>
|
| 181 |
-
{linkage.model.modelId}
|
| 182 |
-
</Link>
|
| 183 |
-
{linkage.model.family && (
|
| 184 |
-
<div className="text-xs text-text-muted mt-0.5">
|
| 185 |
-
{linkage.model.family} β’ {linkage.model.provider}
|
| 186 |
-
</div>
|
| 187 |
-
)}
|
| 188 |
-
{linkage.model.tokensEstMid && (
|
| 189 |
-
<div className="text-xs text-text-muted mt-0.5">
|
| 190 |
-
{formatTokens(linkage.model.tokensEstMid)} tokens
|
| 191 |
-
</div>
|
| 192 |
-
)}
|
| 193 |
-
</td>
|
| 194 |
-
<td className="px-4 py-3">
|
| 195 |
-
<div className="flex flex-col gap-1">
|
| 196 |
-
<span className="badge badge-secondary text-xs">
|
| 197 |
-
{linkage.linkageType === 'temporal_overlap' ? 'Same Time Period' :
|
| 198 |
-
linkage.linkageType === 'inferred' ? 'Same Company' :
|
| 199 |
-
linkage.linkageType || 'β'}
|
| 200 |
-
</span>
|
| 201 |
-
<div className="text-xs text-text-muted/70">
|
| 202 |
-
{linkage.linkageType === 'temporal_overlap'
|
| 203 |
-
? 'Deal & model within 1 year'
|
| 204 |
-
: linkage.linkageType === 'inferred'
|
| 205 |
-
? 'Buyer matches model provider'
|
| 206 |
-
: ''}
|
| 207 |
-
</div>
|
| 208 |
-
</div>
|
| 209 |
-
</td>
|
| 210 |
-
<td className="px-4 py-3">
|
| 211 |
-
<span className={`badge ${
|
| 212 |
-
linkage.linkageStrength === 'high'
|
| 213 |
-
? 'badge-primary'
|
| 214 |
-
: 'badge-secondary'
|
| 215 |
-
} text-xs`}>
|
| 216 |
-
{linkage.linkageStrength === 'high' ? 'High' :
|
| 217 |
-
linkage.linkageStrength === 'medium' ? 'Medium' :
|
| 218 |
-
linkage.linkageStrength === 'low' ? 'Low' :
|
| 219 |
-
'β'}
|
| 220 |
-
</span>
|
| 221 |
-
</td>
|
| 222 |
-
<td className="px-4 py-3">
|
| 223 |
-
<div className="text-sm text-text leading-relaxed">
|
| 224 |
-
{linkage.impactInference || 'β'}
|
| 225 |
-
</div>
|
| 226 |
-
</td>
|
| 227 |
-
</tr>
|
| 228 |
-
))
|
| 229 |
-
)}
|
| 230 |
-
</tbody>
|
| 231 |
-
</table>
|
| 232 |
-
</div>
|
| 233 |
-
</div>
|
| 234 |
-
|
| 235 |
-
{/* Detailed Explanation */}
|
| 236 |
{linkages.length > 0 && (
|
| 237 |
-
<div className="card
|
| 238 |
-
<
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
<
|
| 242 |
-
<
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
</
|
| 256 |
-
<
|
| 257 |
-
<
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
</div>
|
| 272 |
</div>
|
|
|
|
|
|
|
| 273 |
)}
|
| 274 |
-
</div>
|
| 275 |
-
</main>
|
| 276 |
-
)
|
| 277 |
-
}
|
|
|
|
| 1 |
import { prisma } from '@/lib/prisma'
|
|
|
|
|
|
|
| 2 |
import AutoCreate from '@/app/components/linkages/AutoCreate'
|
| 3 |
+
import LinkagesClient from './LinkagesClient'
|
| 4 |
|
| 5 |
async function getLinkages() {
|
| 6 |
+
const linkages = await prisma.dealModelLinkage.findMany({
|
| 7 |
+
include: {
|
| 8 |
+
deal: {
|
| 9 |
+
select: {
|
| 10 |
+
id: true,
|
| 11 |
+
provider: true,
|
| 12 |
+
buyer: true,
|
| 13 |
+
modality: true,
|
| 14 |
+
priceUsd: true,
|
| 15 |
+
date: true,
|
|
|
|
|
|
|
|
|
|
| 16 |
},
|
| 17 |
+
},
|
| 18 |
+
model: {
|
| 19 |
+
select: {
|
| 20 |
+
id: true,
|
| 21 |
+
modelId: true,
|
| 22 |
+
provider: true,
|
| 23 |
+
family: true,
|
| 24 |
+
tokensEstMid: true,
|
| 25 |
},
|
| 26 |
},
|
| 27 |
+
},
|
| 28 |
+
orderBy: [
|
| 29 |
+
{ linkageStrength: 'desc' },
|
| 30 |
+
{ analysisTimestamp: 'desc' },
|
| 31 |
+
],
|
| 32 |
+
})
|
| 33 |
+
return linkages
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
}
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
async function getDealCount() {
|
| 38 |
+
return await prisma.deal.count()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
}
|
| 40 |
|
| 41 |
async function getModelCount() {
|
| 42 |
+
return await prisma.modelRegistry.count()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
}
|
| 44 |
|
| 45 |
export default async function LinkagesPage() {
|
|
|
|
| 58 |
/>
|
| 59 |
|
| 60 |
{/* Header */}
|
| 61 |
+
<div className="mb-4">
|
| 62 |
+
<h1 className="text-3xl font-semibold mb-1">Deal-Model Linkages</h1>
|
| 63 |
+
<p className="text-text-muted text-sm">
|
| 64 |
+
Connections between training data deals and AI models
|
| 65 |
+
</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
</div>
|
| 67 |
|
| 68 |
+
{/* Detailed Explanation - Above table, collapsible */}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
{linkages.length > 0 && (
|
| 70 |
+
<div className="card mb-6">
|
| 71 |
+
<details className="group">
|
| 72 |
+
<summary className="cursor-pointer list-none flex items-center justify-between">
|
| 73 |
+
<h2 className="text-lg font-semibold">How Linkages Work</h2>
|
| 74 |
+
<span className="text-text-muted text-sm group-open:hidden">Click to expand</span>
|
| 75 |
+
<span className="text-text-muted text-sm hidden group-open:inline">Click to collapse</span>
|
| 76 |
+
</summary>
|
| 77 |
+
<div className="mt-3 pt-3 border-t border-border-subtle space-y-3 text-sm text-text-muted">
|
| 78 |
+
<div>
|
| 79 |
+
<h3 className="font-semibold text-text mb-2">Example:</h3>
|
| 80 |
+
<p className="leading-relaxed">
|
| 81 |
+
If you see a linkage: <strong className="text-text">News Corp β OpenAI</strong> connected to <strong className="text-text">GPT-4</strong>,
|
| 82 |
+
it means OpenAI signed a deal with News Corp, and because GPT-4 is an OpenAI model, there's a potential connection.
|
| 83 |
+
If the deal happened in 2023 and GPT-4 was released in 2023, that's a stronger connection (temporal overlap).
|
| 84 |
+
</p>
|
| 85 |
+
</div>
|
| 86 |
+
|
| 87 |
+
<div className="pt-2 border-t border-border-subtle">
|
| 88 |
+
<h3 className="font-semibold text-text mb-2">Connection Types:</h3>
|
| 89 |
+
<ul className="space-y-2">
|
| 90 |
+
<li>
|
| 91 |
+
<strong className="text-text">Same Time Period:</strong> Deal and model release are within 1 year.
|
| 92 |
+
Suggests the deal's data may have been used in training.
|
| 93 |
+
</li>
|
| 94 |
+
<li>
|
| 95 |
+
<strong className="text-text">Same Company:</strong> Deal buyer matches model provider, but different time periods.
|
| 96 |
+
Shows organizational relationship but less direct connection.
|
| 97 |
+
</li>
|
| 98 |
+
</ul>
|
| 99 |
+
</div>
|
| 100 |
+
|
| 101 |
+
<div className="pt-2 border-t border-border-subtle">
|
| 102 |
+
<h3 className="font-semibold text-text mb-2">Confidence Levels:</h3>
|
| 103 |
+
<p className="leading-relaxed">
|
| 104 |
+
Currently all linkages are marked as <strong className="text-text">High</strong> confidence because they require
|
| 105 |
+
a clear match between the deal buyer and model provider. The system automatically creates these connections
|
| 106 |
+
when it finds matching company names (e.g., "OpenAI" in both the deal and model).
|
| 107 |
+
</p>
|
| 108 |
+
</div>
|
| 109 |
</div>
|
| 110 |
+
</details>
|
| 111 |
+
</div>
|
| 112 |
+
)}
|
| 113 |
+
|
| 114 |
+
{/* Linkages Table with Filtering, Sorting, and Grouping */}
|
| 115 |
+
{linkages.length === 0 ? (
|
| 116 |
+
<div className="card">
|
| 117 |
+
<div className="text-center py-12 text-text-muted">
|
| 118 |
+
{dealCount === 0 || modelCount === 0
|
| 119 |
+
? `No ${dealCount === 0 ? 'deals' : 'models'} found. Please seed the database first.`
|
| 120 |
+
: 'No linkages found. Linkage creation will start automatically.'}
|
| 121 |
</div>
|
| 122 |
</div>
|
| 123 |
+
) : (
|
| 124 |
+
<LinkagesClient initialLinkages={linkages.filter(l => l && l.deal && l.model)} />
|
| 125 |
)}
|
|
|
|
|
|
|
|
|
|
|
|
app/models/[id]/page.tsx
CHANGED
|
@@ -91,15 +91,15 @@ export default async function ModelDetailPage({
|
|
| 91 |
|
| 92 |
<div className="max-w-4xl">
|
| 93 |
{/* Header Card */}
|
| 94 |
-
<div className="card mb-
|
| 95 |
-
<div className="mb-
|
| 96 |
-
<h1 className="text-
|
| 97 |
{model.family && (
|
| 98 |
-
<p className="text-text-muted text-
|
| 99 |
)}
|
| 100 |
</div>
|
| 101 |
|
| 102 |
-
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 pt-
|
| 103 |
<div>
|
| 104 |
<div className="text-sm text-text-muted mb-1">Provider</div>
|
| 105 |
<div className="font-medium">{model.provider}</div>
|
|
@@ -137,9 +137,9 @@ export default async function ModelDetailPage({
|
|
| 137 |
|
| 138 |
{/* Token Estimates */}
|
| 139 |
{(model.tokensEstMin || model.tokensEstMax || model.tokensEstMid) && (
|
| 140 |
-
<div className="mb-
|
| 141 |
-
<div className="card mb-
|
| 142 |
-
<h2 className="text-
|
| 143 |
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
| 144 |
<div>
|
| 145 |
<div className="text-sm text-text-muted mb-1">Minimum</div>
|
|
@@ -180,8 +180,8 @@ export default async function ModelDetailPage({
|
|
| 180 |
|
| 181 |
{/* Evidence Profile */}
|
| 182 |
{(evidenceTypes.length > 0 || model.evidenceStrength || uncertaintySources.length > 0) && (
|
| 183 |
-
<div className="card mb-
|
| 184 |
-
<h2 className="text-
|
| 185 |
<div className="space-y-4">
|
| 186 |
{model.evidenceStrength && (
|
| 187 |
<div>
|
|
@@ -221,8 +221,8 @@ export default async function ModelDetailPage({
|
|
| 221 |
|
| 222 |
{/* Training Compute */}
|
| 223 |
{(model.flopsReported || model.flopsEstimated) && (
|
| 224 |
-
<div className="card mb-
|
| 225 |
-
<h2 className="text-
|
| 226 |
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
|
| 227 |
{model.flopsReported && (
|
| 228 |
<div>
|
|
@@ -242,13 +242,13 @@ export default async function ModelDetailPage({
|
|
| 242 |
|
| 243 |
{/* Linked Deals */}
|
| 244 |
{model.modelLinkages && model.modelLinkages.length > 0 && (
|
| 245 |
-
<div className="card mb-
|
| 246 |
-
<h2 className="text-
|
| 247 |
<div className="space-y-3">
|
| 248 |
{model.modelLinkages.map((linkage) => (
|
| 249 |
<div
|
| 250 |
key={linkage.id}
|
| 251 |
-
className="p-4 border border-border-subtle rounded-
|
| 252 |
>
|
| 253 |
<div className="flex items-start justify-between mb-2">
|
| 254 |
<div>
|
|
@@ -285,7 +285,7 @@ export default async function ModelDetailPage({
|
|
| 285 |
{/* Sources */}
|
| 286 |
{sources.length > 0 && (
|
| 287 |
<div className="card">
|
| 288 |
-
<h2 className="text-
|
| 289 |
<div className="space-y-2">
|
| 290 |
{sources.map((source, idx) => (
|
| 291 |
<div key={idx}>
|
|
|
|
| 91 |
|
| 92 |
<div className="max-w-4xl">
|
| 93 |
{/* Header Card */}
|
| 94 |
+
<div className="card mb-6">
|
| 95 |
+
<div className="mb-3">
|
| 96 |
+
<h1 className="text-2xl font-semibold mb-1">{model.modelId}</h1>
|
| 97 |
{model.family && (
|
| 98 |
+
<p className="text-text-muted text-sm">{model.family}</p>
|
| 99 |
)}
|
| 100 |
</div>
|
| 101 |
|
| 102 |
+
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 pt-4 border-t border-border">
|
| 103 |
<div>
|
| 104 |
<div className="text-sm text-text-muted mb-1">Provider</div>
|
| 105 |
<div className="font-medium">{model.provider}</div>
|
|
|
|
| 137 |
|
| 138 |
{/* Token Estimates */}
|
| 139 |
{(model.tokensEstMin || model.tokensEstMax || model.tokensEstMid) && (
|
| 140 |
+
<div className="mb-6">
|
| 141 |
+
<div className="card mb-4">
|
| 142 |
+
<h2 className="text-lg font-semibold mb-3">Training Token Estimates</h2>
|
| 143 |
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
| 144 |
<div>
|
| 145 |
<div className="text-sm text-text-muted mb-1">Minimum</div>
|
|
|
|
| 180 |
|
| 181 |
{/* Evidence Profile */}
|
| 182 |
{(evidenceTypes.length > 0 || model.evidenceStrength || uncertaintySources.length > 0) && (
|
| 183 |
+
<div className="card mb-6">
|
| 184 |
+
<h2 className="text-lg font-semibold mb-3">Evidence Profile</h2>
|
| 185 |
<div className="space-y-4">
|
| 186 |
{model.evidenceStrength && (
|
| 187 |
<div>
|
|
|
|
| 221 |
|
| 222 |
{/* Training Compute */}
|
| 223 |
{(model.flopsReported || model.flopsEstimated) && (
|
| 224 |
+
<div className="card mb-6">
|
| 225 |
+
<h2 className="text-lg font-semibold mb-3">Training Compute</h2>
|
| 226 |
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
|
| 227 |
{model.flopsReported && (
|
| 228 |
<div>
|
|
|
|
| 242 |
|
| 243 |
{/* Linked Deals */}
|
| 244 |
{model.modelLinkages && model.modelLinkages.length > 0 && (
|
| 245 |
+
<div className="card mb-6">
|
| 246 |
+
<h2 className="text-lg font-semibold mb-3">Linked Training Data Deals</h2>
|
| 247 |
<div className="space-y-3">
|
| 248 |
{model.modelLinkages.map((linkage) => (
|
| 249 |
<div
|
| 250 |
key={linkage.id}
|
| 251 |
+
className="p-4 border border-border-subtle rounded-none hover:bg-border-subtle/30 transition-colors"
|
| 252 |
>
|
| 253 |
<div className="flex items-start justify-between mb-2">
|
| 254 |
<div>
|
|
|
|
| 285 |
{/* Sources */}
|
| 286 |
{sources.length > 0 && (
|
| 287 |
<div className="card">
|
| 288 |
+
<h2 className="text-lg font-semibold mb-3">Sources</h2>
|
| 289 |
<div className="space-y-2">
|
| 290 |
{sources.map((source, idx) => (
|
| 291 |
<div key={idx}>
|
app/models/page.tsx
CHANGED
|
@@ -59,14 +59,12 @@ export default async function ModelsPage() {
|
|
| 59 |
/>
|
| 60 |
|
| 61 |
{/* Header */}
|
| 62 |
-
<div className="mb-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
</div>
|
| 69 |
-
</div>
|
| 70 |
|
| 71 |
{/* Stats */}
|
| 72 |
<div className="grid grid-cols-2 md:grid-cols-3 gap-3 mb-8">
|
|
@@ -94,9 +92,9 @@ export default async function ModelsPage() {
|
|
| 94 |
<table className="table text-sm">
|
| 95 |
<thead>
|
| 96 |
<tr className="border-b border-border-subtle">
|
| 97 |
-
<th className="
|
| 98 |
-
<th className="
|
| 99 |
-
<th className="
|
| 100 |
<Tooltip content="The number of trainable parameters in the model, typically measured in billions (B) or trillions (T). More parameters generally mean more capacity to learn complex patterns.">
|
| 101 |
<span className="underline decoration-dotted cursor-help">Params</span>
|
| 102 |
</Tooltip>
|
|
@@ -132,7 +130,7 @@ export default async function ModelsPage() {
|
|
| 132 |
key={model.id}
|
| 133 |
className="cursor-pointer transition-colors border-b border-border-subtle last:border-0 hover:bg-[rgba(232,225,217,0.3)]"
|
| 134 |
>
|
| 135 |
-
<td
|
| 136 |
<Link
|
| 137 |
href={`/models/${model.id}`}
|
| 138 |
className="font-medium text-accent hover:text-accent-hover"
|
|
@@ -143,10 +141,10 @@ export default async function ModelsPage() {
|
|
| 143 |
<div className="text-xs text-text-muted mt-0.5">{model.family}</div>
|
| 144 |
)}
|
| 145 |
</td>
|
| 146 |
-
<td
|
| 147 |
<div className="text-sm">{model.provider}</div>
|
| 148 |
</td>
|
| 149 |
-
<td
|
| 150 |
<div className="text-sm">{formatParams(model.params)}</div>
|
| 151 |
{model.isMoe && (
|
| 152 |
<Tooltip content="Mixture of Experts (MoE): A model architecture that uses multiple specialized sub-networks (experts) but only activates a subset for each input. This allows for larger models with lower computational costs.">
|
|
@@ -174,7 +172,7 @@ export default async function ModelsPage() {
|
|
| 174 |
<span className="text-text-muted/60">β</span>
|
| 175 |
)}
|
| 176 |
</td>
|
| 177 |
-
<td
|
| 178 |
<div className="flex items-center gap-2">
|
| 179 |
{model.architectureType && (
|
| 180 |
<span className="badge badge-secondary text-xs">
|
|
@@ -188,14 +186,14 @@ export default async function ModelsPage() {
|
|
| 188 |
)}
|
| 189 |
</div>
|
| 190 |
</td>
|
| 191 |
-
<td
|
| 192 |
{model.evidenceStrength && (
|
| 193 |
<span className="badge badge-secondary text-xs">
|
| 194 |
{model.evidenceStrength.replace('S-', '')}
|
| 195 |
</span>
|
| 196 |
)}
|
| 197 |
</td>
|
| 198 |
-
<td
|
| 199 |
<div className="text-sm text-text-muted/80">
|
| 200 |
{model.releaseDate ? formatDate(model.releaseDate instanceof Date ? model.releaseDate.toISOString() : String(model.releaseDate)) : 'β'}
|
| 201 |
</div>
|
|
|
|
| 59 |
/>
|
| 60 |
|
| 61 |
{/* Header */}
|
| 62 |
+
<div className="mb-4">
|
| 63 |
+
<h1 className="text-3xl font-semibold mb-1">Model Registry</h1>
|
| 64 |
+
<p className="text-text-muted text-sm">
|
| 65 |
+
Training data scale estimates for major AI models
|
| 66 |
+
</p>
|
| 67 |
+
</div>
|
|
|
|
|
|
|
| 68 |
|
| 69 |
{/* Stats */}
|
| 70 |
<div className="grid grid-cols-2 md:grid-cols-3 gap-3 mb-8">
|
|
|
|
| 92 |
<table className="table text-sm">
|
| 93 |
<thead>
|
| 94 |
<tr className="border-b border-border-subtle">
|
| 95 |
+
<th className="text-left font-semibold">Model</th>
|
| 96 |
+
<th className="text-left font-semibold">Provider</th>
|
| 97 |
+
<th className="text-left font-semibold">
|
| 98 |
<Tooltip content="The number of trainable parameters in the model, typically measured in billions (B) or trillions (T). More parameters generally mean more capacity to learn complex patterns.">
|
| 99 |
<span className="underline decoration-dotted cursor-help">Params</span>
|
| 100 |
</Tooltip>
|
|
|
|
| 130 |
key={model.id}
|
| 131 |
className="cursor-pointer transition-colors border-b border-border-subtle last:border-0 hover:bg-[rgba(232,225,217,0.3)]"
|
| 132 |
>
|
| 133 |
+
<td>
|
| 134 |
<Link
|
| 135 |
href={`/models/${model.id}`}
|
| 136 |
className="font-medium text-accent hover:text-accent-hover"
|
|
|
|
| 141 |
<div className="text-xs text-text-muted mt-0.5">{model.family}</div>
|
| 142 |
)}
|
| 143 |
</td>
|
| 144 |
+
<td>
|
| 145 |
<div className="text-sm">{model.provider}</div>
|
| 146 |
</td>
|
| 147 |
+
<td>
|
| 148 |
<div className="text-sm">{formatParams(model.params)}</div>
|
| 149 |
{model.isMoe && (
|
| 150 |
<Tooltip content="Mixture of Experts (MoE): A model architecture that uses multiple specialized sub-networks (experts) but only activates a subset for each input. This allows for larger models with lower computational costs.">
|
|
|
|
| 172 |
<span className="text-text-muted/60">β</span>
|
| 173 |
)}
|
| 174 |
</td>
|
| 175 |
+
<td>
|
| 176 |
<div className="flex items-center gap-2">
|
| 177 |
{model.architectureType && (
|
| 178 |
<span className="badge badge-secondary text-xs">
|
|
|
|
| 186 |
)}
|
| 187 |
</div>
|
| 188 |
</td>
|
| 189 |
+
<td>
|
| 190 |
{model.evidenceStrength && (
|
| 191 |
<span className="badge badge-secondary text-xs">
|
| 192 |
{model.evidenceStrength.replace('S-', '')}
|
| 193 |
</span>
|
| 194 |
)}
|
| 195 |
</td>
|
| 196 |
+
<td>
|
| 197 |
<div className="text-sm text-text-muted/80">
|
| 198 |
{model.releaseDate ? formatDate(model.releaseDate instanceof Date ? model.releaseDate.toISOString() : String(model.releaseDate)) : 'β'}
|
| 199 |
</div>
|
app/normalization/page.tsx
CHANGED
|
@@ -141,16 +141,16 @@ export default async function NormalizationPage() {
|
|
| 141 |
return (
|
| 142 |
<main className="min-h-screen bg-background">
|
| 143 |
<div className="container-content section-padding">
|
| 144 |
-
<div className="mb-
|
| 145 |
-
<h1 className="text-
|
| 146 |
-
<p className="text-text-muted text-
|
| 147 |
Compare deals on an apples-to-apples basis by normalizing to per-unit pricing
|
| 148 |
</p>
|
| 149 |
</div>
|
| 150 |
|
| 151 |
-
<div className="card mb-
|
| 152 |
-
<h2 className="text-
|
| 153 |
-
<p className="text-text-muted leading-relaxed mb-
|
| 154 |
Different deals use different pricing models (per-book, per-track, aggregate licensing, etc.).
|
| 155 |
This tool normalizes prices to common units (tokens, records, images, minutes) to enable
|
| 156 |
direct comparison.
|
|
|
|
| 141 |
return (
|
| 142 |
<main className="min-h-screen bg-background">
|
| 143 |
<div className="container-content section-padding">
|
| 144 |
+
<div className="mb-4">
|
| 145 |
+
<h1 className="text-3xl font-semibold mb-1">Pricing Normalization Tool</h1>
|
| 146 |
+
<p className="text-text-muted text-sm">
|
| 147 |
Compare deals on an apples-to-apples basis by normalizing to per-unit pricing
|
| 148 |
</p>
|
| 149 |
</div>
|
| 150 |
|
| 151 |
+
<div className="card mb-6">
|
| 152 |
+
<h2 className="text-xl font-semibold mb-3">How It Works</h2>
|
| 153 |
+
<p className="text-text-muted text-sm leading-relaxed mb-3">
|
| 154 |
Different deals use different pricing models (per-book, per-track, aggregate licensing, etc.).
|
| 155 |
This tool normalizes prices to common units (tokens, records, images, minutes) to enable
|
| 156 |
direct comparison.
|
app/page.tsx
CHANGED
|
@@ -176,11 +176,11 @@ export default async function Home() {
|
|
| 176 |
{/* Auto-enrich notification */}
|
| 177 |
<AutoEnrich dealCount={deals.length} dealsWithAllFields={dealsWithAllFields} />
|
| 178 |
{/* Header */}
|
| 179 |
-
<div className="mb-
|
| 180 |
-
<div className="flex items-center justify-between
|
| 181 |
<div>
|
| 182 |
-
<h1 className="text-
|
| 183 |
-
<p className="text-text-muted text-
|
| 184 |
Global licensing, acquisition, and commissioning deals (2020β2025)
|
| 185 |
</p>
|
| 186 |
</div>
|
|
@@ -224,7 +224,7 @@ export default async function Home() {
|
|
| 224 |
</div>
|
| 225 |
|
| 226 |
{/* Scrollable Analytics Cards */}
|
| 227 |
-
<div className="overflow-x-auto pb-2 -mx-
|
| 228 |
<div className="flex gap-4 min-w-max">
|
| 229 |
{/* Modality Breakdown */}
|
| 230 |
<div className="card min-w-[280px] flex-shrink-0">
|
|
|
|
| 176 |
{/* Auto-enrich notification */}
|
| 177 |
<AutoEnrich dealCount={deals.length} dealsWithAllFields={dealsWithAllFields} />
|
| 178 |
{/* Header */}
|
| 179 |
+
<div className="mb-4">
|
| 180 |
+
<div className="flex items-center justify-between">
|
| 181 |
<div>
|
| 182 |
+
<h1 className="text-3xl font-semibold mb-1">Deals Explorer</h1>
|
| 183 |
+
<p className="text-text-muted text-sm">
|
| 184 |
Global licensing, acquisition, and commissioning deals (2020β2025)
|
| 185 |
</p>
|
| 186 |
</div>
|
|
|
|
| 224 |
</div>
|
| 225 |
|
| 226 |
{/* Scrollable Analytics Cards */}
|
| 227 |
+
<div className="overflow-x-auto pb-2 -mx-2 px-2">
|
| 228 |
<div className="flex gap-4 min-w-max">
|
| 229 |
{/* Modality Breakdown */}
|
| 230 |
<div className="card min-w-[280px] flex-shrink-0">
|
app/timeline/page.tsx
CHANGED
|
@@ -208,8 +208,8 @@ export default async function TimelinePage() {
|
|
| 208 |
<div className="min-h-screen bg-background">
|
| 209 |
<div className="container-content section-padding">
|
| 210 |
{/* Header */}
|
| 211 |
-
<div className="mb-
|
| 212 |
-
<h1 className="text-
|
| 213 |
<p className="text-text-muted text-sm mb-1">Tracking the emergence of data markets</p>
|
| 214 |
<p className="text-text-muted text-xs font-medium">Source: Open Data Labs (opendatalabs.xyz)</p>
|
| 215 |
</div>
|
|
|
|
| 208 |
<div className="min-h-screen bg-background">
|
| 209 |
<div className="container-content section-padding">
|
| 210 |
{/* Header */}
|
| 211 |
+
<div className="mb-4">
|
| 212 |
+
<h1 className="text-3xl font-semibold mb-1 tracking-tight">Major AI Training Data Deals (2020β2025)</h1>
|
| 213 |
<p className="text-text-muted text-sm mb-1">Tracking the emergence of data markets</p>
|
| 214 |
<p className="text-text-muted text-xs font-medium">Source: Open Data Labs (opendatalabs.xyz)</p>
|
| 215 |
</div>
|
registry/enrich_all_models.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Batch enrichment script for all existing models
|
| 3 |
+
Enriches all models in the database with comprehensive metadata
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import asyncio
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import List, Optional
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
import json
|
| 13 |
+
|
| 14 |
+
# Add project root to path
|
| 15 |
+
project_root = Path(__file__).parent.parent
|
| 16 |
+
sys.path.insert(0, str(project_root))
|
| 17 |
+
|
| 18 |
+
from registry.enrichment.comprehensive_enrichment import ComprehensiveModelEnricher
|
| 19 |
+
from dotenv import load_dotenv
|
| 20 |
+
|
| 21 |
+
# Prisma imports
|
| 22 |
+
try:
|
| 23 |
+
from prisma import Prisma
|
| 24 |
+
PRISMA_AVAILABLE = True
|
| 25 |
+
except ImportError:
|
| 26 |
+
PRISMA_AVAILABLE = False
|
| 27 |
+
print("Warning: Prisma not available. Install with: npm run db:generate")
|
| 28 |
+
|
| 29 |
+
load_dotenv()
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
async def enrich_all_models(
|
| 33 |
+
limit: Optional[int] = None,
|
| 34 |
+
use_web_search: bool = True,
|
| 35 |
+
use_llm_extraction: bool = True
|
| 36 |
+
):
|
| 37 |
+
"""
|
| 38 |
+
Enrich all models in the database
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
limit: Limit number of models to enrich (for testing)
|
| 42 |
+
use_web_search: Enable web search enrichment
|
| 43 |
+
use_llm_extraction: Enable LLM extraction
|
| 44 |
+
"""
|
| 45 |
+
if not PRISMA_AVAILABLE:
|
| 46 |
+
raise RuntimeError("Prisma not available. Run: npm run db:generate")
|
| 47 |
+
|
| 48 |
+
# Connect to database
|
| 49 |
+
prisma = Prisma()
|
| 50 |
+
await prisma.connect()
|
| 51 |
+
|
| 52 |
+
try:
|
| 53 |
+
# Get all models
|
| 54 |
+
models = await prisma.modelregistry.find_many(
|
| 55 |
+
take=limit
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
print(f"π Starting enrichment of {len(models)} models")
|
| 59 |
+
print(f" Started at: {datetime.now().isoformat()}\n")
|
| 60 |
+
|
| 61 |
+
# Initialize enricher
|
| 62 |
+
enricher = ComprehensiveModelEnricher(
|
| 63 |
+
use_web_search=use_web_search,
|
| 64 |
+
use_llm_extraction=use_llm_extraction
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
success_count = 0
|
| 68 |
+
error_count = 0
|
| 69 |
+
|
| 70 |
+
for i, model in enumerate(models, 1):
|
| 71 |
+
print(f"\n[{i}/{len(models)}] Enriching: {model.modelId} ({model.provider})")
|
| 72 |
+
|
| 73 |
+
try:
|
| 74 |
+
# Get existing data
|
| 75 |
+
existing_data = {
|
| 76 |
+
"params": model.params,
|
| 77 |
+
"releaseDate": model.releaseDate.isoformat() if model.releaseDate else None,
|
| 78 |
+
"architectureType": model.architectureType,
|
| 79 |
+
"isMoe": model.isMoe,
|
| 80 |
+
"multimodal": model.multimodal,
|
| 81 |
+
"tokensEstMid": model.tokensEstMid,
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
# Enrich model
|
| 85 |
+
enriched = await enricher.enrich_model(
|
| 86 |
+
model_id=model.modelId,
|
| 87 |
+
provider=model.provider,
|
| 88 |
+
family=model.family,
|
| 89 |
+
existing_data=existing_data
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# Prepare update data
|
| 93 |
+
update_data = {}
|
| 94 |
+
|
| 95 |
+
# Release date
|
| 96 |
+
if enriched.get("releaseDate"):
|
| 97 |
+
if isinstance(enriched["releaseDate"], str):
|
| 98 |
+
update_data["releaseDate"] = datetime.fromisoformat(enriched["releaseDate"])
|
| 99 |
+
else:
|
| 100 |
+
update_data["releaseDate"] = enriched["releaseDate"]
|
| 101 |
+
|
| 102 |
+
# Architecture
|
| 103 |
+
if enriched.get("architectureType"):
|
| 104 |
+
update_data["architectureType"] = enriched["architectureType"]
|
| 105 |
+
if enriched.get("isMoe") is not None:
|
| 106 |
+
update_data["isMoe"] = enriched["isMoe"]
|
| 107 |
+
if enriched.get("numExperts"):
|
| 108 |
+
update_data["numExperts"] = enriched["numExperts"]
|
| 109 |
+
if enriched.get("multimodal") is not None:
|
| 110 |
+
update_data["multimodal"] = enriched["multimodal"]
|
| 111 |
+
|
| 112 |
+
# Parameters (only if not already set)
|
| 113 |
+
if not model.params and enriched.get("params"):
|
| 114 |
+
update_data["params"] = enriched["params"]
|
| 115 |
+
|
| 116 |
+
# Token estimates
|
| 117 |
+
if enriched.get("tokensEstMin"):
|
| 118 |
+
update_data["tokensEstMin"] = enriched["tokensEstMin"]
|
| 119 |
+
if enriched.get("tokensEstMax"):
|
| 120 |
+
update_data["tokensEstMax"] = enriched["tokensEstMax"]
|
| 121 |
+
if enriched.get("tokensEstMid"):
|
| 122 |
+
update_data["tokensEstMid"] = enriched["tokensEstMid"]
|
| 123 |
+
if enriched.get("tokensRangeGeneratedAt"):
|
| 124 |
+
update_data["tokensRangeGeneratedAt"] = enriched["tokensRangeGeneratedAt"]
|
| 125 |
+
|
| 126 |
+
# Evidence profile
|
| 127 |
+
if enriched.get("evidenceTypes"):
|
| 128 |
+
update_data["evidenceTypes"] = enriched["evidenceTypes"]
|
| 129 |
+
if enriched.get("evidenceStrength"):
|
| 130 |
+
update_data["evidenceStrength"] = enriched["evidenceStrength"]
|
| 131 |
+
if enriched.get("uncertaintySources"):
|
| 132 |
+
update_data["uncertaintySources"] = enriched["uncertaintySources"]
|
| 133 |
+
if enriched.get("evidenceProfileGeneratedAt"):
|
| 134 |
+
update_data["evidenceProfileGeneratedAt"] = enriched["evidenceProfileGeneratedAt"]
|
| 135 |
+
|
| 136 |
+
# Sources
|
| 137 |
+
if enriched.get("sources"):
|
| 138 |
+
update_data["sources"] = enriched["sources"]
|
| 139 |
+
|
| 140 |
+
# Raw evidence snippets
|
| 141 |
+
if enriched.get("rawEvidenceSnippets"):
|
| 142 |
+
update_data["rawEvidenceSnippets"] = enriched["rawEvidenceSnippets"]
|
| 143 |
+
|
| 144 |
+
# Composition estimates
|
| 145 |
+
if enriched.get("compositionEstimates"):
|
| 146 |
+
update_data["compositionEstimates"] = enriched["compositionEstimates"]
|
| 147 |
+
|
| 148 |
+
# Training period
|
| 149 |
+
if enriched.get("trainingPeriodStart"):
|
| 150 |
+
if isinstance(enriched["trainingPeriodStart"], str):
|
| 151 |
+
update_data["trainingPeriodStart"] = datetime.fromisoformat(enriched["trainingPeriodStart"])
|
| 152 |
+
else:
|
| 153 |
+
update_data["trainingPeriodStart"] = enriched["trainingPeriodStart"]
|
| 154 |
+
if enriched.get("trainingPeriodEnd"):
|
| 155 |
+
if isinstance(enriched["trainingPeriodEnd"], str):
|
| 156 |
+
update_data["trainingPeriodEnd"] = datetime.fromisoformat(enriched["trainingPeriodEnd"])
|
| 157 |
+
else:
|
| 158 |
+
update_data["trainingPeriodEnd"] = enriched["trainingPeriodEnd"]
|
| 159 |
+
|
| 160 |
+
# Update model
|
| 161 |
+
if update_data:
|
| 162 |
+
update_data["updatedAt"] = datetime.now()
|
| 163 |
+
await prisma.modelregistry.update(
|
| 164 |
+
where={"id": model.id},
|
| 165 |
+
data=update_data
|
| 166 |
+
)
|
| 167 |
+
print(f" β
Updated {len(update_data)} fields")
|
| 168 |
+
success_count += 1
|
| 169 |
+
else:
|
| 170 |
+
print(f" β οΈ No new data to update")
|
| 171 |
+
success_count += 1
|
| 172 |
+
|
| 173 |
+
except Exception as e:
|
| 174 |
+
print(f" β Error: {e}")
|
| 175 |
+
error_count += 1
|
| 176 |
+
continue
|
| 177 |
+
|
| 178 |
+
print(f"\nβ
Enrichment complete!")
|
| 179 |
+
print(f" Successfully enriched: {success_count}/{len(models)}")
|
| 180 |
+
print(f" Errors: {error_count}")
|
| 181 |
+
print(f" Completed at: {datetime.now().isoformat()}")
|
| 182 |
+
|
| 183 |
+
finally:
|
| 184 |
+
await prisma.disconnect()
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
async def main():
|
| 188 |
+
"""Main entry point"""
|
| 189 |
+
import argparse
|
| 190 |
+
|
| 191 |
+
parser = argparse.ArgumentParser(description="Enrich all models in database")
|
| 192 |
+
parser.add_argument("--limit", type=int, help="Limit number of models to enrich")
|
| 193 |
+
parser.add_argument("--no-web", action="store_true", help="Disable web search enrichment")
|
| 194 |
+
parser.add_argument("--no-llm", action="store_true", help="Disable LLM extraction")
|
| 195 |
+
|
| 196 |
+
args = parser.parse_args()
|
| 197 |
+
|
| 198 |
+
await enrich_all_models(
|
| 199 |
+
limit=args.limit,
|
| 200 |
+
use_web_search=not args.no_web,
|
| 201 |
+
use_llm_extraction=not args.no_llm
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
if __name__ == "__main__":
|
| 206 |
+
asyncio.run(main())
|
| 207 |
+
|
registry/enrichment/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Model enrichment module for web-based data collection
|
| 3 |
+
"""
|
| 4 |
+
|
registry/enrichment/comprehensive_enrichment.py
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Comprehensive Model Enrichment Pipeline
|
| 3 |
+
Orchestrates all enrichment sources and merges data
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import sys
|
| 7 |
+
import asyncio
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Dict, List, Optional, Any
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
import json
|
| 12 |
+
|
| 13 |
+
# Add project root to path
|
| 14 |
+
project_root = Path(__file__).parent.parent.parent
|
| 15 |
+
sys.path.insert(0, str(project_root))
|
| 16 |
+
|
| 17 |
+
from registry.collectors.epoch_collector import EpochCollector
|
| 18 |
+
from registry.collectors.hf_collector import HuggingFaceCollector
|
| 19 |
+
from registry.enrichment.web_enrichment import WebModelEnricher
|
| 20 |
+
from registry.evidence_profile import EvidenceProfileManager
|
| 21 |
+
from registry.inference.reconciliation import TokenInferenceReconciler
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class ComprehensiveModelEnricher:
|
| 25 |
+
"""Orchestrates all enrichment sources for comprehensive model metadata"""
|
| 26 |
+
|
| 27 |
+
def __init__(
|
| 28 |
+
self,
|
| 29 |
+
use_web_search: bool = True,
|
| 30 |
+
use_llm_extraction: bool = True,
|
| 31 |
+
exa_api_key: Optional[str] = None,
|
| 32 |
+
llm_provider: str = "openai",
|
| 33 |
+
llm_api_key: Optional[str] = None
|
| 34 |
+
):
|
| 35 |
+
"""
|
| 36 |
+
Initialize comprehensive enricher
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
use_web_search: Enable web search enrichment
|
| 40 |
+
use_llm_extraction: Enable LLM extraction
|
| 41 |
+
exa_api_key: Exa API key
|
| 42 |
+
llm_provider: LLM provider ("openai" or "anthropic")
|
| 43 |
+
llm_api_key: LLM API key
|
| 44 |
+
"""
|
| 45 |
+
self.epoch_collector = EpochCollector()
|
| 46 |
+
self.hf_collector = HuggingFaceCollector()
|
| 47 |
+
self.inference_reconciler = TokenInferenceReconciler()
|
| 48 |
+
self.evidence_manager = EvidenceProfileManager()
|
| 49 |
+
|
| 50 |
+
if use_web_search:
|
| 51 |
+
try:
|
| 52 |
+
self.web_enricher = WebModelEnricher(
|
| 53 |
+
exa_api_key=exa_api_key,
|
| 54 |
+
llm_provider=llm_provider,
|
| 55 |
+
llm_api_key=llm_api_key
|
| 56 |
+
)
|
| 57 |
+
except Exception as e:
|
| 58 |
+
print(f"Warning: Web enricher initialization failed: {e}")
|
| 59 |
+
self.web_enricher = None
|
| 60 |
+
else:
|
| 61 |
+
self.web_enricher = None
|
| 62 |
+
|
| 63 |
+
async def enrich_model(
|
| 64 |
+
self,
|
| 65 |
+
model_id: str,
|
| 66 |
+
provider: str,
|
| 67 |
+
family: Optional[str] = None,
|
| 68 |
+
existing_data: Optional[Dict[str, Any]] = None
|
| 69 |
+
) -> Dict[str, Any]:
|
| 70 |
+
"""
|
| 71 |
+
Enrich model with data from all sources
|
| 72 |
+
|
| 73 |
+
Args:
|
| 74 |
+
model_id: Model identifier
|
| 75 |
+
provider: Model provider
|
| 76 |
+
family: Model family
|
| 77 |
+
existing_data: Existing model data
|
| 78 |
+
|
| 79 |
+
Returns:
|
| 80 |
+
Comprehensive enriched metadata
|
| 81 |
+
"""
|
| 82 |
+
# Start with existing data or empty dict
|
| 83 |
+
enriched = existing_data.copy() if existing_data else {}
|
| 84 |
+
enriched["modelId"] = model_id
|
| 85 |
+
enriched["provider"] = provider
|
| 86 |
+
enriched["family"] = family or provider
|
| 87 |
+
|
| 88 |
+
# Source 1: Epoch AI
|
| 89 |
+
epoch_data = None
|
| 90 |
+
try:
|
| 91 |
+
# Epoch collector may be async or sync, handle both
|
| 92 |
+
if hasattr(self.epoch_collector, 'fetch_notable_models'):
|
| 93 |
+
epoch_models = self.epoch_collector.fetch_notable_models()
|
| 94 |
+
if asyncio.iscoroutine(epoch_models):
|
| 95 |
+
epoch_models = await epoch_models
|
| 96 |
+
for model in epoch_models:
|
| 97 |
+
if self._matches_model(model.get("model_name", ""), model_id, provider):
|
| 98 |
+
epoch_data = model
|
| 99 |
+
break
|
| 100 |
+
except Exception as e:
|
| 101 |
+
print(f"Epoch collection error: {e}")
|
| 102 |
+
|
| 103 |
+
# Source 2: HuggingFace
|
| 104 |
+
hf_data = None
|
| 105 |
+
try:
|
| 106 |
+
# HF collector may be async or sync, handle both
|
| 107 |
+
if hasattr(self.hf_collector, 'search_models'):
|
| 108 |
+
hf_result = self.hf_collector.search_models(model_id)
|
| 109 |
+
if asyncio.iscoroutine(hf_result):
|
| 110 |
+
hf_result = await hf_result
|
| 111 |
+
if hf_result:
|
| 112 |
+
hf_data = hf_result[0] if isinstance(hf_result, list) else hf_result
|
| 113 |
+
except Exception as e:
|
| 114 |
+
print(f"HF collection error: {e}")
|
| 115 |
+
|
| 116 |
+
# Source 3: Web search (if enabled)
|
| 117 |
+
web_data = None
|
| 118 |
+
if self.web_enricher:
|
| 119 |
+
try:
|
| 120 |
+
web_data = self.web_enricher.enrich_model(
|
| 121 |
+
model_id=model_id,
|
| 122 |
+
provider=provider,
|
| 123 |
+
existing_data=enriched
|
| 124 |
+
)
|
| 125 |
+
except Exception as e:
|
| 126 |
+
print(f"Web enrichment error: {e}")
|
| 127 |
+
|
| 128 |
+
# Merge all sources with priority
|
| 129 |
+
merged = self._merge_sources(
|
| 130 |
+
enriched,
|
| 131 |
+
epoch_data,
|
| 132 |
+
hf_data,
|
| 133 |
+
web_data
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
# Run token inference if we have params
|
| 137 |
+
if merged.get("params"):
|
| 138 |
+
try:
|
| 139 |
+
inference_input = {
|
| 140 |
+
"params": merged.get("params"),
|
| 141 |
+
"flops": merged.get("flopsReported"),
|
| 142 |
+
"architecture": {
|
| 143 |
+
"is_moe": merged.get("isMoe", False),
|
| 144 |
+
"num_experts": merged.get("numExperts"),
|
| 145 |
+
},
|
| 146 |
+
"provider": provider,
|
| 147 |
+
"model_id": model_id,
|
| 148 |
+
}
|
| 149 |
+
inference_result = self.inference_reconciler.reconcile(inference_input)
|
| 150 |
+
|
| 151 |
+
# Add token estimates
|
| 152 |
+
merged["tokensEstMin"] = inference_result.get("min")
|
| 153 |
+
merged["tokensEstMax"] = inference_result.get("max")
|
| 154 |
+
merged["tokensEstMid"] = inference_result.get("mid")
|
| 155 |
+
merged["tokensRangeGeneratedAt"] = datetime.now()
|
| 156 |
+
except Exception as e:
|
| 157 |
+
print(f"Token inference error: {e}")
|
| 158 |
+
|
| 159 |
+
# Generate evidence profile
|
| 160 |
+
evidence_profile = self._generate_evidence_profile(
|
| 161 |
+
epoch_data,
|
| 162 |
+
hf_data,
|
| 163 |
+
web_data,
|
| 164 |
+
merged
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
# Add evidence profile fields
|
| 168 |
+
merged["evidenceTypes"] = json.dumps(evidence_profile.get("evidence_types", []))
|
| 169 |
+
merged["evidenceStrength"] = evidence_profile.get("strength")
|
| 170 |
+
merged["uncertaintySources"] = json.dumps(evidence_profile.get("uncertainty", []))
|
| 171 |
+
merged["evidenceProfileGeneratedAt"] = datetime.now()
|
| 172 |
+
|
| 173 |
+
# Combine sources
|
| 174 |
+
sources = []
|
| 175 |
+
if epoch_data and epoch_data.get("source_url"):
|
| 176 |
+
sources.append({
|
| 177 |
+
"type": "epoch",
|
| 178 |
+
"url": epoch_data.get("source_url"),
|
| 179 |
+
"retrieved_at": datetime.now().isoformat(),
|
| 180 |
+
})
|
| 181 |
+
if hf_data and hf_data.get("url"):
|
| 182 |
+
sources.append({
|
| 183 |
+
"type": "huggingface",
|
| 184 |
+
"url": hf_data.get("url"),
|
| 185 |
+
"retrieved_at": datetime.now().isoformat(),
|
| 186 |
+
})
|
| 187 |
+
if web_data:
|
| 188 |
+
sources.extend(web_data.get("sources", []))
|
| 189 |
+
|
| 190 |
+
if sources:
|
| 191 |
+
merged["sources"] = json.dumps(sources)
|
| 192 |
+
|
| 193 |
+
# Add raw evidence snippets
|
| 194 |
+
raw_snippets = []
|
| 195 |
+
if web_data:
|
| 196 |
+
raw_snippets.extend(web_data.get("raw_evidence_snippets", []))
|
| 197 |
+
|
| 198 |
+
if raw_snippets:
|
| 199 |
+
merged["rawEvidenceSnippets"] = json.dumps(raw_snippets)
|
| 200 |
+
|
| 201 |
+
# Add composition estimates if training data info found
|
| 202 |
+
if web_data and web_data.get("training_data_composition"):
|
| 203 |
+
composition = {
|
| 204 |
+
"description": web_data.get("training_data_composition"),
|
| 205 |
+
"sources": web_data.get("training_data_sources", []),
|
| 206 |
+
}
|
| 207 |
+
merged["compositionEstimates"] = json.dumps(composition)
|
| 208 |
+
|
| 209 |
+
return merged
|
| 210 |
+
|
| 211 |
+
def _matches_model(self, name: str, model_id: str, provider: str) -> bool:
|
| 212 |
+
"""Check if model name matches"""
|
| 213 |
+
name_lower = name.lower()
|
| 214 |
+
model_lower = model_id.lower()
|
| 215 |
+
provider_lower = provider.lower()
|
| 216 |
+
|
| 217 |
+
# Exact match
|
| 218 |
+
if model_lower in name_lower or name_lower in model_lower:
|
| 219 |
+
return True
|
| 220 |
+
|
| 221 |
+
# Provider match + partial model match
|
| 222 |
+
if provider_lower in name_lower:
|
| 223 |
+
# Check for common model patterns
|
| 224 |
+
if any(pattern in model_lower for pattern in ["gpt", "claude", "gemini", "llama", "mistral"]):
|
| 225 |
+
if any(pattern in name_lower for pattern in ["gpt", "claude", "gemini", "llama", "mistral"]):
|
| 226 |
+
return True
|
| 227 |
+
|
| 228 |
+
return False
|
| 229 |
+
|
| 230 |
+
def _merge_sources(
|
| 231 |
+
self,
|
| 232 |
+
base: Dict[str, Any],
|
| 233 |
+
epoch_data: Optional[Dict[str, Any]],
|
| 234 |
+
hf_data: Optional[Dict[str, Any]],
|
| 235 |
+
web_data: Optional[Dict[str, Any]]
|
| 236 |
+
) -> Dict[str, Any]:
|
| 237 |
+
"""Merge data from all sources with priority"""
|
| 238 |
+
merged = base.copy()
|
| 239 |
+
|
| 240 |
+
# Priority: Direct disclosures > Third-party analysis > Inferred
|
| 241 |
+
|
| 242 |
+
# From Epoch (high priority - curated dataset)
|
| 243 |
+
if epoch_data:
|
| 244 |
+
if not merged.get("params") and epoch_data.get("parameter_count"):
|
| 245 |
+
merged["params"] = epoch_data.get("parameter_count") / 1e9
|
| 246 |
+
if not merged.get("releaseDate") and epoch_data.get("release_date"):
|
| 247 |
+
merged["releaseDate"] = self._parse_date(epoch_data.get("release_date"))
|
| 248 |
+
if not merged.get("architectureType") and epoch_data.get("architecture_type"):
|
| 249 |
+
merged["architectureType"] = epoch_data.get("architecture_type")
|
| 250 |
+
if epoch_data.get("architecture_type", "").lower() == "moe":
|
| 251 |
+
merged["isMoe"] = True
|
| 252 |
+
if not merged.get("multimodal"):
|
| 253 |
+
modality = epoch_data.get("modality", "").lower()
|
| 254 |
+
merged["multimodal"] = modality in ["multimodal", "vision", "image"]
|
| 255 |
+
if not merged.get("flopsReported") and epoch_data.get("compute_PF_days"):
|
| 256 |
+
merged["flopsReported"] = epoch_data.get("compute_PF_days")
|
| 257 |
+
|
| 258 |
+
# From HuggingFace (medium priority)
|
| 259 |
+
if hf_data:
|
| 260 |
+
if not merged.get("params") and hf_data.get("params"):
|
| 261 |
+
merged["params"] = hf_data.get("params") / 1e9
|
| 262 |
+
if not merged.get("releaseDate") and hf_data.get("created_at"):
|
| 263 |
+
merged["releaseDate"] = self._parse_date(hf_data.get("created_at"))
|
| 264 |
+
|
| 265 |
+
# From Web search (lower priority but comprehensive)
|
| 266 |
+
if web_data:
|
| 267 |
+
if not merged.get("releaseDate") and web_data.get("release_date"):
|
| 268 |
+
merged["releaseDate"] = self._parse_date(web_data.get("release_date"))
|
| 269 |
+
if not merged.get("architectureType") and web_data.get("architecture_type"):
|
| 270 |
+
merged["architectureType"] = web_data.get("architecture_type")
|
| 271 |
+
if web_data.get("is_moe") is not None:
|
| 272 |
+
merged["isMoe"] = web_data.get("is_moe")
|
| 273 |
+
if web_data.get("num_experts"):
|
| 274 |
+
merged["numExperts"] = web_data.get("num_experts")
|
| 275 |
+
if web_data.get("multimodal") is not None:
|
| 276 |
+
merged["multimodal"] = web_data.get("multimodal")
|
| 277 |
+
if web_data.get("training_period_start"):
|
| 278 |
+
merged["trainingPeriodStart"] = self._parse_date(web_data.get("training_period_start"))
|
| 279 |
+
if web_data.get("training_period_end"):
|
| 280 |
+
merged["trainingPeriodEnd"] = self._parse_date(web_data.get("training_period_end"))
|
| 281 |
+
|
| 282 |
+
return merged
|
| 283 |
+
|
| 284 |
+
def _generate_evidence_profile(
|
| 285 |
+
self,
|
| 286 |
+
epoch_data: Optional[Dict[str, Any]],
|
| 287 |
+
hf_data: Optional[Dict[str, Any]],
|
| 288 |
+
web_data: Optional[Dict[str, Any]],
|
| 289 |
+
merged: Dict[str, Any]
|
| 290 |
+
) -> Dict[str, Any]:
|
| 291 |
+
"""Generate evidence profile from all sources"""
|
| 292 |
+
evidence_types = set()
|
| 293 |
+
sources_count = 0
|
| 294 |
+
|
| 295 |
+
# Epoch data - usually E4 (third-party analysis)
|
| 296 |
+
if epoch_data:
|
| 297 |
+
sources_count += 1
|
| 298 |
+
evidence_types.add("E4")
|
| 299 |
+
if epoch_data.get("parameter_count"):
|
| 300 |
+
evidence_types.add("E3") # Architecture evidence
|
| 301 |
+
|
| 302 |
+
# HuggingFace - E4 (third-party)
|
| 303 |
+
if hf_data:
|
| 304 |
+
sources_count += 1
|
| 305 |
+
evidence_types.add("E4")
|
| 306 |
+
|
| 307 |
+
# Web data - can be E1-E5 depending on source
|
| 308 |
+
if web_data:
|
| 309 |
+
sources_count += len(web_data.get("sources", []))
|
| 310 |
+
web_evidence = web_data.get("evidence_types", [])
|
| 311 |
+
evidence_types.update(web_evidence)
|
| 312 |
+
|
| 313 |
+
# Direct disclosure if we have official sources
|
| 314 |
+
if web_data:
|
| 315 |
+
for source in web_data.get("sources", []):
|
| 316 |
+
url = source.get("url", "").lower()
|
| 317 |
+
if any(domain in url for domain in ["openai.com", "anthropic.com", "google.com", "meta.com"]):
|
| 318 |
+
evidence_types.add("E1") # Direct disclosure
|
| 319 |
+
|
| 320 |
+
# Compute evidence
|
| 321 |
+
if merged.get("flopsReported"):
|
| 322 |
+
evidence_types.add("E2")
|
| 323 |
+
|
| 324 |
+
# Architecture evidence
|
| 325 |
+
if merged.get("params") or merged.get("architectureType"):
|
| 326 |
+
evidence_types.add("E3")
|
| 327 |
+
|
| 328 |
+
# Calculate strength
|
| 329 |
+
if sources_count >= 3 and "E1" in evidence_types:
|
| 330 |
+
strength = "S-High"
|
| 331 |
+
elif sources_count >= 2 or "E1" in evidence_types:
|
| 332 |
+
strength = "S-Medium"
|
| 333 |
+
else:
|
| 334 |
+
strength = "S-Low"
|
| 335 |
+
|
| 336 |
+
# Identify uncertainties
|
| 337 |
+
uncertainty = []
|
| 338 |
+
if not merged.get("releaseDate"):
|
| 339 |
+
uncertainty.append("U5")
|
| 340 |
+
if not merged.get("architectureType"):
|
| 341 |
+
uncertainty.append("U3")
|
| 342 |
+
if not merged.get("trainingPeriodStart"):
|
| 343 |
+
uncertainty.append("U2")
|
| 344 |
+
|
| 345 |
+
return {
|
| 346 |
+
"evidence_types": list(evidence_types),
|
| 347 |
+
"strength": strength,
|
| 348 |
+
"uncertainty": uncertainty,
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
def _parse_date(self, date_value: Any) -> Optional[datetime]:
|
| 352 |
+
"""Parse date from various formats"""
|
| 353 |
+
if not date_value:
|
| 354 |
+
return None
|
| 355 |
+
|
| 356 |
+
if isinstance(date_value, datetime):
|
| 357 |
+
return date_value
|
| 358 |
+
|
| 359 |
+
if isinstance(date_value, str):
|
| 360 |
+
try:
|
| 361 |
+
return datetime.fromisoformat(date_value.replace("Z", "+00:00"))
|
| 362 |
+
except (ValueError, AttributeError):
|
| 363 |
+
if len(date_value) == 4 and date_value.isdigit():
|
| 364 |
+
return datetime(int(date_value), 1, 1)
|
| 365 |
+
|
| 366 |
+
if isinstance(date_value, int):
|
| 367 |
+
return datetime(date_value, 1, 1)
|
| 368 |
+
|
| 369 |
+
return None
|
| 370 |
+
|
registry/enrichment/llm_extractor.py
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LLM Extraction Service
|
| 3 |
+
Uses OpenAI or Anthropic API to extract structured data from web content
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import json
|
| 8 |
+
from typing import Dict, List, Optional, Any
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
import re
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class LLMExtractor:
|
| 14 |
+
"""Extracts structured model metadata from web content using LLM"""
|
| 15 |
+
|
| 16 |
+
def __init__(self, provider: str = "openai", api_key: Optional[str] = None):
|
| 17 |
+
"""
|
| 18 |
+
Initialize LLM extractor
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
provider: "openai" or "anthropic"
|
| 22 |
+
api_key: API key (if None, reads from env)
|
| 23 |
+
"""
|
| 24 |
+
self.provider = provider.lower()
|
| 25 |
+
self.api_key = api_key or os.getenv(
|
| 26 |
+
"OPENAI_API_KEY" if self.provider == "openai" else "ANTHROPIC_API_KEY"
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
if not self.api_key:
|
| 30 |
+
raise ValueError(
|
| 31 |
+
f"{provider.upper()}_API_KEY environment variable required"
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
if self.provider == "openai":
|
| 35 |
+
try:
|
| 36 |
+
import openai
|
| 37 |
+
self.client = openai.OpenAI(api_key=self.api_key)
|
| 38 |
+
except ImportError:
|
| 39 |
+
raise ImportError("openai package required. Install with: pip install openai")
|
| 40 |
+
elif self.provider == "anthropic":
|
| 41 |
+
try:
|
| 42 |
+
import anthropic
|
| 43 |
+
self.client = anthropic.Anthropic(api_key=self.api_key)
|
| 44 |
+
except ImportError:
|
| 45 |
+
raise ImportError("anthropic package required. Install with: pip install anthropic")
|
| 46 |
+
else:
|
| 47 |
+
raise ValueError(f"Unsupported provider: {provider}")
|
| 48 |
+
|
| 49 |
+
def extract_model_metadata(
|
| 50 |
+
self,
|
| 51 |
+
model_id: str,
|
| 52 |
+
provider: str,
|
| 53 |
+
web_content: str,
|
| 54 |
+
context: Optional[Dict[str, Any]] = None
|
| 55 |
+
) -> Dict[str, Any]:
|
| 56 |
+
"""
|
| 57 |
+
Extract structured metadata from web content
|
| 58 |
+
|
| 59 |
+
Args:
|
| 60 |
+
model_id: Model identifier (e.g., "GPT-4")
|
| 61 |
+
provider: Model provider (e.g., "OpenAI")
|
| 62 |
+
web_content: Text content from web sources
|
| 63 |
+
context: Additional context (existing model data)
|
| 64 |
+
|
| 65 |
+
Returns:
|
| 66 |
+
Dict with extracted metadata
|
| 67 |
+
"""
|
| 68 |
+
# Build prompt
|
| 69 |
+
prompt = self._build_extraction_prompt(model_id, provider, web_content, context)
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
if self.provider == "openai":
|
| 73 |
+
response = self.client.chat.completions.create(
|
| 74 |
+
model="gpt-4o-mini", # Use cheaper model for extraction
|
| 75 |
+
messages=[
|
| 76 |
+
{"role": "system", "content": "You are a data extraction assistant. Extract structured information about AI models from text. Always return valid JSON."},
|
| 77 |
+
{"role": "user", "content": prompt}
|
| 78 |
+
],
|
| 79 |
+
response_format={"type": "json_object"},
|
| 80 |
+
temperature=0.1, # Low temperature for consistent extraction
|
| 81 |
+
)
|
| 82 |
+
result_text = response.choices[0].message.content
|
| 83 |
+
else: # anthropic
|
| 84 |
+
response = self.client.messages.create(
|
| 85 |
+
model="claude-3-5-sonnet-20241022",
|
| 86 |
+
max_tokens=2000,
|
| 87 |
+
system="You are a data extraction assistant. Extract structured information about AI models from text. Always return valid JSON.",
|
| 88 |
+
messages=[
|
| 89 |
+
{"role": "user", "content": prompt}
|
| 90 |
+
],
|
| 91 |
+
)
|
| 92 |
+
result_text = response.content[0].text
|
| 93 |
+
|
| 94 |
+
# Parse JSON response
|
| 95 |
+
extracted = json.loads(result_text)
|
| 96 |
+
|
| 97 |
+
# Validate and normalize
|
| 98 |
+
return self._validate_extraction(extracted, model_id, provider)
|
| 99 |
+
|
| 100 |
+
except json.JSONDecodeError as e:
|
| 101 |
+
print(f"JSON decode error: {e}")
|
| 102 |
+
return self._empty_extraction()
|
| 103 |
+
except Exception as e:
|
| 104 |
+
print(f"LLM extraction error: {e}")
|
| 105 |
+
return self._empty_extraction()
|
| 106 |
+
|
| 107 |
+
def _build_extraction_prompt(
|
| 108 |
+
self,
|
| 109 |
+
model_id: str,
|
| 110 |
+
provider: str,
|
| 111 |
+
web_content: str,
|
| 112 |
+
context: Optional[Dict[str, Any]] = None
|
| 113 |
+
) -> str:
|
| 114 |
+
"""Build extraction prompt"""
|
| 115 |
+
context_str = ""
|
| 116 |
+
if context:
|
| 117 |
+
context_str = f"\n\nExisting known information:\n{json.dumps(context, indent=2)}"
|
| 118 |
+
|
| 119 |
+
prompt = f"""Extract structured information about the AI model "{model_id}" by {provider} from the following text.
|
| 120 |
+
|
| 121 |
+
Text content:
|
| 122 |
+
{web_content[:8000]} # Limit content size
|
| 123 |
+
{context_str}
|
| 124 |
+
|
| 125 |
+
Extract the following information and return as JSON:
|
| 126 |
+
{{
|
| 127 |
+
"release_date": "YYYY-MM-DD or null if not found",
|
| 128 |
+
"architecture_type": "Transformer, MoE, or null",
|
| 129 |
+
"is_moe": true/false/null,
|
| 130 |
+
"num_experts": number or null,
|
| 131 |
+
"multimodal": true/false/null,
|
| 132 |
+
"training_data_sources": ["source1", "source2"] or [],
|
| 133 |
+
"training_data_composition": "description or null",
|
| 134 |
+
"training_period_start": "YYYY-MM-DD or null",
|
| 135 |
+
"training_period_end": "YYYY-MM-DD or null",
|
| 136 |
+
"evidence_types": ["E1", "E2", "E3", "E4", "E5"] based on disclosure level,
|
| 137 |
+
"confidence": "high/medium/low",
|
| 138 |
+
"raw_snippets": ["relevant quote 1", "relevant quote 2"]
|
| 139 |
+
}}
|
| 140 |
+
|
| 141 |
+
Evidence type mapping:
|
| 142 |
+
- E1: Direct disclosure (company blog, paper, official announcement)
|
| 143 |
+
- E2: Compute evidence (FLOPs, hardware mentioned)
|
| 144 |
+
- E3: Architecture evidence (parameters, MoE details)
|
| 145 |
+
- E4: Third-party analysis (research paper, news article)
|
| 146 |
+
- E5: Qualitative hints (vague mentions, speculation)
|
| 147 |
+
|
| 148 |
+
Return only valid JSON, no additional text."""
|
| 149 |
+
|
| 150 |
+
return prompt
|
| 151 |
+
|
| 152 |
+
def _validate_extraction(
|
| 153 |
+
self,
|
| 154 |
+
extracted: Dict[str, Any],
|
| 155 |
+
model_id: str,
|
| 156 |
+
provider: str
|
| 157 |
+
) -> Dict[str, Any]:
|
| 158 |
+
"""Validate and normalize extracted data"""
|
| 159 |
+
validated = {
|
| 160 |
+
"release_date": self._parse_date(extracted.get("release_date")),
|
| 161 |
+
"architecture_type": extracted.get("architecture_type"),
|
| 162 |
+
"is_moe": extracted.get("is_moe"),
|
| 163 |
+
"num_experts": extracted.get("num_experts"),
|
| 164 |
+
"multimodal": extracted.get("multimodal"),
|
| 165 |
+
"training_data_sources": extracted.get("training_data_sources", []),
|
| 166 |
+
"training_data_composition": extracted.get("training_data_composition"),
|
| 167 |
+
"training_period_start": self._parse_date(extracted.get("training_period_start")),
|
| 168 |
+
"training_period_end": self._parse_date(extracted.get("training_period_end")),
|
| 169 |
+
"evidence_types": extracted.get("evidence_types", []),
|
| 170 |
+
"confidence": extracted.get("confidence", "medium"),
|
| 171 |
+
"raw_snippets": extracted.get("raw_snippets", []),
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
# Normalize architecture type
|
| 175 |
+
if validated["architecture_type"]:
|
| 176 |
+
arch = validated["architecture_type"].lower()
|
| 177 |
+
if "moe" in arch or "mixture" in arch:
|
| 178 |
+
validated["architecture_type"] = "MoE"
|
| 179 |
+
validated["is_moe"] = True
|
| 180 |
+
elif "transformer" in arch:
|
| 181 |
+
validated["architecture_type"] = "Transformer"
|
| 182 |
+
else:
|
| 183 |
+
validated["architecture_type"] = "Transformer" # Default
|
| 184 |
+
|
| 185 |
+
return validated
|
| 186 |
+
|
| 187 |
+
def _parse_date(self, date_str: Optional[str]) -> Optional[str]:
|
| 188 |
+
"""Parse date string to ISO format"""
|
| 189 |
+
if not date_str or date_str.lower() in ["null", "none", ""]:
|
| 190 |
+
return None
|
| 191 |
+
|
| 192 |
+
# Try to extract date from various formats
|
| 193 |
+
# YYYY-MM-DD
|
| 194 |
+
if re.match(r'^\d{4}-\d{2}-\d{2}$', date_str):
|
| 195 |
+
return date_str
|
| 196 |
+
|
| 197 |
+
# Try to parse common formats
|
| 198 |
+
try:
|
| 199 |
+
dt = datetime.fromisoformat(date_str.replace('Z', '+00:00'))
|
| 200 |
+
return dt.strftime("%Y-%m-%d")
|
| 201 |
+
except (ValueError, AttributeError):
|
| 202 |
+
pass
|
| 203 |
+
|
| 204 |
+
# Try year only
|
| 205 |
+
year_match = re.search(r'\b(20\d{2})\b', date_str)
|
| 206 |
+
if year_match:
|
| 207 |
+
return f"{year_match.group(1)}-01-01"
|
| 208 |
+
|
| 209 |
+
return None
|
| 210 |
+
|
| 211 |
+
def _empty_extraction(self) -> Dict[str, Any]:
|
| 212 |
+
"""Return empty extraction result"""
|
| 213 |
+
return {
|
| 214 |
+
"release_date": None,
|
| 215 |
+
"architecture_type": None,
|
| 216 |
+
"is_moe": None,
|
| 217 |
+
"num_experts": None,
|
| 218 |
+
"multimodal": None,
|
| 219 |
+
"training_data_sources": [],
|
| 220 |
+
"training_data_composition": None,
|
| 221 |
+
"training_period_start": None,
|
| 222 |
+
"training_period_end": None,
|
| 223 |
+
"evidence_types": [],
|
| 224 |
+
"confidence": "low",
|
| 225 |
+
"raw_snippets": [],
|
| 226 |
+
}
|
| 227 |
+
|
registry/enrichment/web_enrichment.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Web Search Enrichment Service
|
| 3 |
+
Uses Exa API to search for model information and extract metadata
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Dict, List, Optional, Any
|
| 10 |
+
from datetime import datetime, timedelta
|
| 11 |
+
import json
|
| 12 |
+
|
| 13 |
+
# Add project root to path
|
| 14 |
+
project_root = Path(__file__).parent.parent.parent
|
| 15 |
+
sys.path.insert(0, str(project_root))
|
| 16 |
+
|
| 17 |
+
try:
|
| 18 |
+
from ingestion.exa_client import ExaClient
|
| 19 |
+
except ImportError:
|
| 20 |
+
# Fallback if exa_client not available
|
| 21 |
+
ExaClient = None
|
| 22 |
+
|
| 23 |
+
from .llm_extractor import LLMExtractor
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class WebModelEnricher:
|
| 27 |
+
"""Enriches model metadata using web search via Exa API"""
|
| 28 |
+
|
| 29 |
+
def __init__(
|
| 30 |
+
self,
|
| 31 |
+
exa_api_key: Optional[str] = None,
|
| 32 |
+
llm_provider: str = "openai",
|
| 33 |
+
llm_api_key: Optional[str] = None
|
| 34 |
+
):
|
| 35 |
+
"""
|
| 36 |
+
Initialize web enricher
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
exa_api_key: Exa API key (if None, reads from env)
|
| 40 |
+
llm_provider: LLM provider ("openai" or "anthropic")
|
| 41 |
+
llm_api_key: LLM API key (if None, reads from env)
|
| 42 |
+
"""
|
| 43 |
+
# Initialize Exa client
|
| 44 |
+
if ExaClient:
|
| 45 |
+
try:
|
| 46 |
+
self.exa_client = ExaClient(api_key=exa_api_key)
|
| 47 |
+
except Exception as e:
|
| 48 |
+
print(f"Warning: Exa client initialization failed: {e}")
|
| 49 |
+
self.exa_client = None
|
| 50 |
+
else:
|
| 51 |
+
self.exa_client = None
|
| 52 |
+
|
| 53 |
+
# Initialize LLM extractor
|
| 54 |
+
try:
|
| 55 |
+
self.llm_extractor = LLMExtractor(provider=llm_provider, api_key=llm_api_key)
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f"Warning: LLM extractor initialization failed: {e}")
|
| 58 |
+
self.llm_extractor = None
|
| 59 |
+
|
| 60 |
+
def enrich_model(
|
| 61 |
+
self,
|
| 62 |
+
model_id: str,
|
| 63 |
+
provider: str,
|
| 64 |
+
existing_data: Optional[Dict[str, Any]] = None,
|
| 65 |
+
num_search_results: int = 5
|
| 66 |
+
) -> Dict[str, Any]:
|
| 67 |
+
"""
|
| 68 |
+
Enrich model metadata using web search
|
| 69 |
+
|
| 70 |
+
Args:
|
| 71 |
+
model_id: Model identifier
|
| 72 |
+
provider: Model provider
|
| 73 |
+
existing_data: Existing model data (for context)
|
| 74 |
+
num_search_results: Number of search results to fetch
|
| 75 |
+
|
| 76 |
+
Returns:
|
| 77 |
+
Dict with enriched metadata
|
| 78 |
+
"""
|
| 79 |
+
if not self.exa_client:
|
| 80 |
+
return self._empty_enrichment()
|
| 81 |
+
|
| 82 |
+
# Build search queries
|
| 83 |
+
queries = self._build_search_queries(model_id, provider)
|
| 84 |
+
|
| 85 |
+
# Search for information
|
| 86 |
+
all_results = []
|
| 87 |
+
for query in queries:
|
| 88 |
+
try:
|
| 89 |
+
results = self.exa_client.search(
|
| 90 |
+
query=query,
|
| 91 |
+
num_results=num_search_results
|
| 92 |
+
)
|
| 93 |
+
all_results.extend(results)
|
| 94 |
+
except Exception as e:
|
| 95 |
+
print(f"Exa search error for query '{query}': {e}")
|
| 96 |
+
continue
|
| 97 |
+
|
| 98 |
+
if not all_results:
|
| 99 |
+
return self._empty_enrichment()
|
| 100 |
+
|
| 101 |
+
# Deduplicate by URL
|
| 102 |
+
seen_urls = set()
|
| 103 |
+
unique_results = []
|
| 104 |
+
for result in all_results:
|
| 105 |
+
if result.url not in seen_urls:
|
| 106 |
+
seen_urls.add(result.url)
|
| 107 |
+
unique_results.append(result)
|
| 108 |
+
|
| 109 |
+
# Sort by score
|
| 110 |
+
unique_results.sort(key=lambda x: x.score, reverse=True)
|
| 111 |
+
|
| 112 |
+
# Fetch content from top results
|
| 113 |
+
top_urls = [r.url for r in unique_results[:num_search_results]]
|
| 114 |
+
contents = {}
|
| 115 |
+
if self.exa_client:
|
| 116 |
+
try:
|
| 117 |
+
contents = self.exa_client.get_contents(top_urls)
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"Error fetching contents: {e}")
|
| 120 |
+
|
| 121 |
+
# Combine all content
|
| 122 |
+
combined_content = self._combine_content(unique_results, contents)
|
| 123 |
+
|
| 124 |
+
# Extract structured data using LLM
|
| 125 |
+
extracted = {}
|
| 126 |
+
if self.llm_extractor and combined_content:
|
| 127 |
+
try:
|
| 128 |
+
extracted = self.llm_extractor.extract_model_metadata(
|
| 129 |
+
model_id=model_id,
|
| 130 |
+
provider=provider,
|
| 131 |
+
web_content=combined_content,
|
| 132 |
+
context=existing_data
|
| 133 |
+
)
|
| 134 |
+
except Exception as e:
|
| 135 |
+
print(f"LLM extraction error: {e}")
|
| 136 |
+
|
| 137 |
+
# Build enrichment result
|
| 138 |
+
enrichment = {
|
| 139 |
+
"release_date": extracted.get("release_date"),
|
| 140 |
+
"architecture_type": extracted.get("architecture_type"),
|
| 141 |
+
"is_moe": extracted.get("is_moe"),
|
| 142 |
+
"num_experts": extracted.get("num_experts"),
|
| 143 |
+
"multimodal": extracted.get("multimodal"),
|
| 144 |
+
"training_data_sources": extracted.get("training_data_sources", []),
|
| 145 |
+
"training_data_composition": extracted.get("training_data_composition"),
|
| 146 |
+
"training_period_start": extracted.get("training_period_start"),
|
| 147 |
+
"training_period_end": extracted.get("training_period_end"),
|
| 148 |
+
"evidence_types": extracted.get("evidence_types", []),
|
| 149 |
+
"confidence": extracted.get("confidence", "medium"),
|
| 150 |
+
"sources": [
|
| 151 |
+
{
|
| 152 |
+
"type": "web_search",
|
| 153 |
+
"url": result.url,
|
| 154 |
+
"title": result.title,
|
| 155 |
+
"score": result.score,
|
| 156 |
+
"published_date": result.published_date,
|
| 157 |
+
}
|
| 158 |
+
for result in unique_results[:num_search_results]
|
| 159 |
+
],
|
| 160 |
+
"raw_evidence_snippets": [
|
| 161 |
+
{
|
| 162 |
+
"text": snippet,
|
| 163 |
+
"source_url": unique_results[0].url if unique_results else None,
|
| 164 |
+
}
|
| 165 |
+
for snippet in extracted.get("raw_snippets", [])
|
| 166 |
+
],
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
return enrichment
|
| 170 |
+
|
| 171 |
+
def _build_search_queries(self, model_id: str, provider: str) -> List[str]:
|
| 172 |
+
"""Build search queries for model information"""
|
| 173 |
+
queries = [
|
| 174 |
+
f"{model_id} {provider} release date architecture training data",
|
| 175 |
+
f"{model_id} {provider} system card technical details",
|
| 176 |
+
f"{model_id} {provider} training dataset sources",
|
| 177 |
+
f"{model_id} {provider} model card paper",
|
| 178 |
+
]
|
| 179 |
+
|
| 180 |
+
# Add provider-specific queries
|
| 181 |
+
if provider.lower() in ["openai", "anthropic", "google", "meta"]:
|
| 182 |
+
queries.append(f"{provider} {model_id} official announcement blog")
|
| 183 |
+
|
| 184 |
+
return queries
|
| 185 |
+
|
| 186 |
+
def _combine_content(
|
| 187 |
+
self,
|
| 188 |
+
results: List[Any],
|
| 189 |
+
contents: Dict[str, str]
|
| 190 |
+
) -> str:
|
| 191 |
+
"""Combine content from search results"""
|
| 192 |
+
combined = []
|
| 193 |
+
|
| 194 |
+
for result in results:
|
| 195 |
+
# Prefer full content, fallback to summary
|
| 196 |
+
if result.url in contents:
|
| 197 |
+
combined.append(f"--- Content from {result.url} ---\n{contents[result.url]}")
|
| 198 |
+
elif result.summary:
|
| 199 |
+
combined.append(f"--- Summary from {result.url} ---\n{result.title}\n{result.summary}")
|
| 200 |
+
|
| 201 |
+
return "\n\n".join(combined)
|
| 202 |
+
|
| 203 |
+
def _empty_enrichment(self) -> Dict[str, Any]:
|
| 204 |
+
"""Return empty enrichment result"""
|
| 205 |
+
return {
|
| 206 |
+
"release_date": None,
|
| 207 |
+
"architecture_type": None,
|
| 208 |
+
"is_moe": None,
|
| 209 |
+
"num_experts": None,
|
| 210 |
+
"multimodal": None,
|
| 211 |
+
"training_data_sources": [],
|
| 212 |
+
"training_data_composition": None,
|
| 213 |
+
"training_period_start": None,
|
| 214 |
+
"training_period_end": None,
|
| 215 |
+
"evidence_types": [],
|
| 216 |
+
"confidence": "low",
|
| 217 |
+
"sources": [],
|
| 218 |
+
"raw_evidence_snippets": [],
|
| 219 |
+
}
|
| 220 |
+
|
registry/evidence_profile.py
CHANGED
|
@@ -143,4 +143,68 @@ class EvidenceProfileManager:
|
|
| 143 |
explanation += f". Uncertainties: {unc_desc}"
|
| 144 |
|
| 145 |
return explanation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
|
|
|
| 143 |
explanation += f". Uncertainties: {unc_desc}"
|
| 144 |
|
| 145 |
return explanation
|
| 146 |
+
|
| 147 |
+
@staticmethod
|
| 148 |
+
def generate_from_web_data(
|
| 149 |
+
web_data: Dict[str, Any],
|
| 150 |
+
existing_evidence: Optional[Dict[str, Any]] = None
|
| 151 |
+
) -> Dict[str, Any]:
|
| 152 |
+
"""
|
| 153 |
+
Generate evidence profile from web-extracted data
|
| 154 |
+
|
| 155 |
+
Args:
|
| 156 |
+
web_data: Extracted data from web search/LLM
|
| 157 |
+
existing_evidence: Existing evidence profile to merge with
|
| 158 |
+
|
| 159 |
+
Returns:
|
| 160 |
+
Evidence profile dict
|
| 161 |
+
"""
|
| 162 |
+
evidence_types = set(web_data.get("evidence_types", []))
|
| 163 |
+
confidence = web_data.get("confidence", "medium")
|
| 164 |
+
sources_count = len(web_data.get("sources", []))
|
| 165 |
+
raw_snippets = web_data.get("raw_evidence_snippets", [])
|
| 166 |
+
|
| 167 |
+
# Determine evidence strength based on sources and confidence
|
| 168 |
+
if sources_count >= 3 and confidence == "high":
|
| 169 |
+
strength = "S-High"
|
| 170 |
+
elif sources_count >= 2 or confidence == "high":
|
| 171 |
+
strength = "S-Medium"
|
| 172 |
+
else:
|
| 173 |
+
strength = "S-Low"
|
| 174 |
+
|
| 175 |
+
# Identify uncertainty sources for missing information
|
| 176 |
+
uncertainty_sources = []
|
| 177 |
+
if not web_data.get("release_date"):
|
| 178 |
+
uncertainty_sources.append("U5") # Intentional opacity or missing
|
| 179 |
+
if not web_data.get("architecture_type"):
|
| 180 |
+
uncertainty_sources.append("U3") # Architecture unclear
|
| 181 |
+
if not web_data.get("training_data_sources"):
|
| 182 |
+
uncertainty_sources.append("U2") # Data composition unknown
|
| 183 |
+
|
| 184 |
+
# If we have existing evidence, merge it
|
| 185 |
+
if existing_evidence:
|
| 186 |
+
existing_types = set(existing_evidence.get("evidence_types", []))
|
| 187 |
+
evidence_types.update(existing_types)
|
| 188 |
+
|
| 189 |
+
# Use higher strength if available
|
| 190 |
+
existing_strength = existing_evidence.get("strength")
|
| 191 |
+
if existing_strength:
|
| 192 |
+
strength_order = {"S-High": 3, "S-Medium": 2, "S-Low": 1}
|
| 193 |
+
if strength_order.get(existing_strength, 0) > strength_order.get(strength, 0):
|
| 194 |
+
strength = existing_strength
|
| 195 |
+
|
| 196 |
+
# Merge uncertainties
|
| 197 |
+
existing_unc = set(existing_evidence.get("uncertainty", []))
|
| 198 |
+
uncertainty_sources = list(set(uncertainty_sources) | existing_unc)
|
| 199 |
+
|
| 200 |
+
# Create evidence profile
|
| 201 |
+
profile = {
|
| 202 |
+
"evidence_types": list(evidence_types),
|
| 203 |
+
"strength": strength,
|
| 204 |
+
"uncertainty": uncertainty_sources,
|
| 205 |
+
"generated_at": datetime.now().isoformat(),
|
| 206 |
+
"evidence_version": "1.0",
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
return profile
|
| 210 |
|
registry/ingest_priority_models.py
CHANGED
|
@@ -25,6 +25,7 @@ from registry.collectors.epoch_collector import EpochCollector
|
|
| 25 |
from registry.collectors.hf_collector import HuggingFaceCollector
|
| 26 |
from registry.inference.reconciliation import TokenInferenceReconciler
|
| 27 |
from registry.linkage import create_deal_model_linkages
|
|
|
|
| 28 |
from dotenv import load_dotenv
|
| 29 |
|
| 30 |
# Prisma imports
|
|
@@ -41,12 +42,25 @@ load_dotenv()
|
|
| 41 |
class PriorityModelIngester:
|
| 42 |
"""Programmatic ingester for priority models"""
|
| 43 |
|
| 44 |
-
def __init__(self):
|
| 45 |
self.epoch_collector = EpochCollector()
|
| 46 |
self.hf_collector = HuggingFaceCollector()
|
| 47 |
self.inference_reconciler = TokenInferenceReconciler()
|
| 48 |
self.prisma = None
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
async def connect_db(self):
|
| 51 |
"""Connect to Prisma database"""
|
| 52 |
if not PRISMA_AVAILABLE:
|
|
@@ -226,20 +240,63 @@ class PriorityModelIngester:
|
|
| 226 |
where={"modelId": model_id}
|
| 227 |
)
|
| 228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
if existing:
|
| 230 |
# Update
|
|
|
|
| 231 |
updated = await self.prisma.modelregistry.update(
|
| 232 |
where={"id": existing.id},
|
| 233 |
-
data=
|
| 234 |
-
**{k: v for k, v in model_data.items() if k != "modelId"},
|
| 235 |
-
"updatedAt": datetime.now(),
|
| 236 |
-
}
|
| 237 |
)
|
| 238 |
return updated.id
|
| 239 |
else:
|
| 240 |
# Create
|
|
|
|
| 241 |
created = await self.prisma.modelregistry.create(
|
| 242 |
-
data=
|
| 243 |
)
|
| 244 |
return created.id
|
| 245 |
|
|
@@ -317,24 +374,40 @@ class PriorityModelIngester:
|
|
| 317 |
|
| 318 |
print(f"\nπ¦ Processing: {model_id} ({provider})")
|
| 319 |
|
| 320 |
-
# Step 1: Fetch metadata
|
| 321 |
-
print(f" π Fetching metadata...")
|
| 322 |
epoch_data = await self.fetch_epoch_data(model_id, provider)
|
| 323 |
hf_data = await self.fetch_hf_data(model_id, provider)
|
| 324 |
|
| 325 |
-
# Step 2: Merge metadata
|
| 326 |
model_data = self.merge_metadata(priority_model, epoch_data, hf_data)
|
| 327 |
|
| 328 |
-
# Step 3:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
print(f" π§ Running token inference...")
|
| 330 |
inference_results = await self.run_token_inference(model_data)
|
| 331 |
model_data.update(inference_results)
|
| 332 |
|
| 333 |
-
# Step
|
| 334 |
print(f" πΎ Storing in database...")
|
| 335 |
await self.upsert_model(model_data)
|
| 336 |
|
| 337 |
-
# Step
|
| 338 |
print(f" π Creating deal linkages...")
|
| 339 |
await self.create_linkages(model_id)
|
| 340 |
|
|
|
|
| 25 |
from registry.collectors.hf_collector import HuggingFaceCollector
|
| 26 |
from registry.inference.reconciliation import TokenInferenceReconciler
|
| 27 |
from registry.linkage import create_deal_model_linkages
|
| 28 |
+
from registry.enrichment.comprehensive_enrichment import ComprehensiveModelEnricher
|
| 29 |
from dotenv import load_dotenv
|
| 30 |
|
| 31 |
# Prisma imports
|
|
|
|
| 42 |
class PriorityModelIngester:
|
| 43 |
"""Programmatic ingester for priority models"""
|
| 44 |
|
| 45 |
+
def __init__(self, use_web_enrichment: bool = True):
|
| 46 |
self.epoch_collector = EpochCollector()
|
| 47 |
self.hf_collector = HuggingFaceCollector()
|
| 48 |
self.inference_reconciler = TokenInferenceReconciler()
|
| 49 |
self.prisma = None
|
| 50 |
|
| 51 |
+
# Initialize comprehensive enricher if web enrichment enabled
|
| 52 |
+
if use_web_enrichment:
|
| 53 |
+
try:
|
| 54 |
+
self.comprehensive_enricher = ComprehensiveModelEnricher(
|
| 55 |
+
use_web_search=True,
|
| 56 |
+
use_llm_extraction=True
|
| 57 |
+
)
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"Warning: Comprehensive enricher initialization failed: {e}")
|
| 60 |
+
self.comprehensive_enricher = None
|
| 61 |
+
else:
|
| 62 |
+
self.comprehensive_enricher = None
|
| 63 |
+
|
| 64 |
async def connect_db(self):
|
| 65 |
"""Connect to Prisma database"""
|
| 66 |
if not PRISMA_AVAILABLE:
|
|
|
|
| 240 |
where={"modelId": model_id}
|
| 241 |
)
|
| 242 |
|
| 243 |
+
# Prepare data for Prisma (convert field names)
|
| 244 |
+
prisma_data = {}
|
| 245 |
+
|
| 246 |
+
# Map field names from model_data to Prisma schema
|
| 247 |
+
field_mapping = {
|
| 248 |
+
"modelId": "modelId",
|
| 249 |
+
"provider": "provider",
|
| 250 |
+
"family": "family",
|
| 251 |
+
"params": "params",
|
| 252 |
+
"releaseDate": "releaseDate",
|
| 253 |
+
"architectureType": "architectureType",
|
| 254 |
+
"isMoe": "isMoe",
|
| 255 |
+
"numExperts": "numExperts",
|
| 256 |
+
"multimodal": "multimodal",
|
| 257 |
+
"tokensEstMin": "tokensEstMin",
|
| 258 |
+
"tokensEstMax": "tokensEstMax",
|
| 259 |
+
"tokensEstMid": "tokensEstMid",
|
| 260 |
+
"tokensRangeGeneratedAt": "tokensRangeGeneratedAt",
|
| 261 |
+
"evidenceTypes": "evidenceTypes",
|
| 262 |
+
"evidenceStrength": "evidenceStrength",
|
| 263 |
+
"uncertaintySources": "uncertaintySources",
|
| 264 |
+
"evidenceProfileGeneratedAt": "evidenceProfileGeneratedAt",
|
| 265 |
+
"sources": "sources",
|
| 266 |
+
"rawEvidenceSnippets": "rawEvidenceSnippets",
|
| 267 |
+
"compositionEstimates": "compositionEstimates",
|
| 268 |
+
"trainingPeriodStart": "trainingPeriodStart",
|
| 269 |
+
"trainingPeriodEnd": "trainingPeriodEnd",
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
for key, value in model_data.items():
|
| 273 |
+
if key in field_mapping and value is not None:
|
| 274 |
+
prisma_key = field_mapping[key]
|
| 275 |
+
# Convert date strings to datetime if needed
|
| 276 |
+
if prisma_key in ["releaseDate", "tokensRangeGeneratedAt", "evidenceProfileGeneratedAt", "trainingPeriodStart", "trainingPeriodEnd"]:
|
| 277 |
+
if isinstance(value, str):
|
| 278 |
+
try:
|
| 279 |
+
prisma_data[prisma_key] = datetime.fromisoformat(value.replace("Z", "+00:00"))
|
| 280 |
+
except:
|
| 281 |
+
pass
|
| 282 |
+
elif isinstance(value, datetime):
|
| 283 |
+
prisma_data[prisma_key] = value
|
| 284 |
+
else:
|
| 285 |
+
prisma_data[prisma_key] = value
|
| 286 |
+
|
| 287 |
if existing:
|
| 288 |
# Update
|
| 289 |
+
prisma_data["updatedAt"] = datetime.now()
|
| 290 |
updated = await self.prisma.modelregistry.update(
|
| 291 |
where={"id": existing.id},
|
| 292 |
+
data=prisma_data
|
|
|
|
|
|
|
|
|
|
| 293 |
)
|
| 294 |
return updated.id
|
| 295 |
else:
|
| 296 |
# Create
|
| 297 |
+
prisma_data["modelId"] = model_id
|
| 298 |
created = await self.prisma.modelregistry.create(
|
| 299 |
+
data=prisma_data
|
| 300 |
)
|
| 301 |
return created.id
|
| 302 |
|
|
|
|
| 374 |
|
| 375 |
print(f"\nπ¦ Processing: {model_id} ({provider})")
|
| 376 |
|
| 377 |
+
# Step 1: Fetch metadata from Epoch and HF
|
| 378 |
+
print(f" π Fetching metadata from Epoch/HF...")
|
| 379 |
epoch_data = await self.fetch_epoch_data(model_id, provider)
|
| 380 |
hf_data = await self.fetch_hf_data(model_id, provider)
|
| 381 |
|
| 382 |
+
# Step 2: Merge metadata from Epoch/HF
|
| 383 |
model_data = self.merge_metadata(priority_model, epoch_data, hf_data)
|
| 384 |
|
| 385 |
+
# Step 3: Web enrichment (if enabled)
|
| 386 |
+
if self.comprehensive_enricher:
|
| 387 |
+
print(f" π Running web enrichment...")
|
| 388 |
+
try:
|
| 389 |
+
# Use comprehensive enricher which includes web search
|
| 390 |
+
enriched = await self.comprehensive_enricher.enrich_model(
|
| 391 |
+
model_id=model_id,
|
| 392 |
+
provider=provider,
|
| 393 |
+
family=priority_model.get("family"),
|
| 394 |
+
existing_data=model_data
|
| 395 |
+
)
|
| 396 |
+
# Merge web enrichment results
|
| 397 |
+
model_data.update(enriched)
|
| 398 |
+
except Exception as e:
|
| 399 |
+
print(f" Warning: Web enrichment error: {e}")
|
| 400 |
+
|
| 401 |
+
# Step 4: Run token inference
|
| 402 |
print(f" π§ Running token inference...")
|
| 403 |
inference_results = await self.run_token_inference(model_data)
|
| 404 |
model_data.update(inference_results)
|
| 405 |
|
| 406 |
+
# Step 5: Store in database
|
| 407 |
print(f" πΎ Storing in database...")
|
| 408 |
await self.upsert_model(model_data)
|
| 409 |
|
| 410 |
+
# Step 6: Create linkages
|
| 411 |
print(f" π Creating deal linkages...")
|
| 412 |
await self.create_linkages(model_id)
|
| 413 |
|
registry/requirements.txt
CHANGED
|
@@ -4,3 +4,6 @@ huggingface-hub>=0.16.0
|
|
| 4 |
pandas>=2.0.0
|
| 5 |
python-dotenv>=1.0.0
|
| 6 |
prisma>=0.11.0
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
pandas>=2.0.0
|
| 5 |
python-dotenv>=1.0.0
|
| 6 |
prisma>=0.11.0
|
| 7 |
+
openai>=1.0.0
|
| 8 |
+
anthropic>=0.18.0
|
| 9 |
+
requests>=2.31.0
|