| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| export type KBType = "embedded" | "api" | "file" | "vector"; |
|
|
| export interface KnowledgeBase { |
| id: string; |
| name: string; |
| description: string; |
| type: KBType; |
| |
| entryCount: number; |
| |
| defaultEnabled: boolean; |
| |
| icon: string; |
| |
| tags: string[]; |
| } |
|
|
| export interface KBEntry { |
| id: string; |
| title: string; |
| content: string; |
| |
| source?: string; |
| |
| score?: number; |
| } |
|
|
| export interface KBQueryResult { |
| kb: KnowledgeBase; |
| entries: KBEntry[]; |
| queryMs: number; |
| } |
|
|
| |
|
|
| const ANTIBODY_DESIGN_KB: KBEntry[] = [ |
| { |
| id: "abd-001", |
| title: "De novo antibody design pipeline", |
| content: "Standard pipeline: backbone generation (RFdiffusion) → sequence design (ProteinMPNN) → structure prediction filter (RF2/AF2, iPAE < 10) → scoring consensus (Chai-1 + AF2 + Protenix, 3-model agreement) → developability filter (Agmata aggregation < 0.3, humanness > 0.8). Typical yield: 5-15% of generated backbones pass all filters.", |
| source: "Proteinea internal SOP v3.2", |
| }, |
| { |
| id: "abd-002", |
| title: "Format selection guidelines", |
| content: "VHH (nanobody, 15 kDa): best for tissue penetration, no Fc effector functions, ideal for imaging and intracellular targets. scFv (27 kDa): similar penetration, can be reformatted to full IgG. mAb IgG1: full Fc, enables ADCC/CDC, standard for oncology. mAb IgG4: Fc-silent, preferred for blocking/neutralization without cell killing. Bispecific: dual-target engagement, complex manufacturing.", |
| source: "Proteinea format decision tree", |
| }, |
| { |
| id: "abd-003", |
| title: "Affinity maturation strategies", |
| content: "In silico maturation: CDR walking with ProteinMPNN (fix framework, randomize CDR positions). Typical improvement: 3-10x affinity gain per round. Library size: 1000-5000 variants per CDR. Scoring: iPAE + binding energy (Rosetta ΔΔG). Key risk: aggregation propensity increases with higher affinity — always co-optimize developability.", |
| source: "Proteinea maturation protocol", |
| }, |
| { |
| id: "abd-004", |
| title: "Developability assessment checklist", |
| content: "Critical metrics: aggregation propensity (Agmata score < 0.3), thermostability (Tm > 65°C for VHH, > 70°C for IgG), expression yield (> 0.5 g/L CHO for IgG, > 5 mg/L E.coli for VHH), humanness score (OASis > 0.8), polyreactivity (ELISA panel negative), viscosity (< 20 cP at 150 mg/mL for SC formulation).", |
| source: "Proteinea developability SOP", |
| }, |
| { |
| id: "abd-005", |
| title: "Epitope mapping approaches", |
| content: "Computational: docking with ClusPro/HADDOCK, interface residue prediction with Chai-1 iPAE heatmaps. Experimental: HDX-MS (gold standard), alanine scanning, peptide arrays. For therapeutic differentiation: map against known antibody epitopes (trastuzumab epitope on HER2 domain IV, cetuximab on EGFR domain III) to identify novel binding sites.", |
| source: "Proteinea epitope mapping guide", |
| }, |
| ]; |
|
|
| const BENCHMARK_KB: KBEntry[] = [ |
| { |
| id: "bkb-001", |
| title: "Benchmark interpretation guide", |
| content: "Hit rate is the fraction of designs that bind the target (SPR/BLI KD < 100 nM). Affinity is reported as the best single binder. Target coverage is the fraction of tested targets with at least one hit. Key caveat: benchmarks test different targets — JAM-2 covers 16 antigens including GPCRs, Chai-2 only 4 soluble. Direct comparison requires matched-target analysis.", |
| source: "Proteinea benchmark methodology", |
| }, |
| { |
| id: "bkb-002", |
| title: "JAM-2 benchmark deep dive", |
| content: "Joint Antibody-antigen Model 2 (Dec 2025). 748 designs across 16 antigens. VHH hit rate 39%, mAb 18%. Best affinity: 170 pM (HER2 VHH). Strengths: broad target coverage, joint VH/VL prediction. Weaknesses: not yet open-source, developability pass rate only 57%. The current state-of-the-art for de novo antibody design by hit rate.", |
| source: "https://arxiv.org/abs/2512.20605", |
| }, |
| { |
| id: "bkb-003", |
| title: "When to use which benchmark", |
| content: "For VHH-focused programs: JAM-2 (highest hit rate at 39%), RFantibody (open-source, 15%). For developability-critical programs: Chai-2 (86% pass rate vs JAM-2's 57%). For mAb programs: JAM-2 is the only benchmark with mAb-format results. For rapid iteration: DiffAb/dyMEAN have open code but lower hit rates (6-8%).", |
| source: "Proteinea benchmark selection guide", |
| }, |
| ]; |
|
|
| const TARGET_BIOLOGY_KB: KBEntry[] = [ |
| { |
| id: "tbk-001", |
| title: "Target class considerations", |
| content: "Soluble targets (HER2 ECD, TNF-alpha, IL-6): well-suited for all formats, crystal structures widely available. Membrane targets (PD-L1, CD20): require careful epitope selection, VHH preferred for deep cleft access. GPCRs (CXCR7, CXCR4): hardest class — limited epitopes, dynamic conformations, VHH essential for accessing extracellular loops. Hit rates for GPCRs are typically 3-5x lower than soluble targets.", |
| source: "Proteinea target assessment framework", |
| }, |
| { |
| id: "tbk-002", |
| title: "Competitive landscape awareness", |
| content: "Before designing: check existing approved antibodies (trastuzumab/HER2, pembrolizumab/PD-1, rituximab/CD20) and their epitopes. Differentiation strategies: novel epitope, higher affinity, better developability, bispecific engagement, enhanced effector function (afucosylation for ADCC). Key databases: IMGT, Thera-SAbDab, DrugBank.", |
| source: "Proteinea BD intelligence", |
| }, |
| ]; |
|
|
| const MODEL_REGISTRY_KB: KBEntry[] = [ |
| { |
| id: "mrk-001", |
| title: "Model selection decision tree", |
| content: "Step 1: Choose backbone generator (RFdiffusion for de novo, ABodyBuilder2 for humanization). Step 2: Choose sequence designer (ProteinMPNN, always). Step 3: Choose scoring stack — for speed: ESMFold (CPU, seconds); for accuracy: AF2 + Chai-1 consensus (GPU, minutes); for production: 3-model consensus (AF2 + Chai-1 + Protenix). Step 4: Choose developability predictor (AbLang for humanness, Agmata for aggregation).", |
| source: "Proteinea model selection SOP", |
| }, |
| { |
| id: "mrk-002", |
| title: "GPU requirements and scaling", |
| content: "RFdiffusion: 16GB VRAM, ~30s per backbone. ProteinMPNN: 8GB, ~5s per sequence. AF2: 24GB for full model, 16GB for AF2-multimer-reduced. Chai-1: 24GB, ~2 min per complex. For 50-design campaign: ~45 min on single A100. For 500-design campaign: recommend 4x A100 or cloud batch (RunPod, ~$2/hr per A100).", |
| source: "Proteinea infrastructure guide", |
| }, |
| ]; |
|
|
| |
|
|
| export const KNOWLEDGE_BASES: KnowledgeBase[] = [ |
| { |
| id: "kb-antibody-design", |
| name: "Antibody Design", |
| description: "Design pipeline SOPs, format selection, affinity maturation, developability assessment, epitope mapping. Proteinea's internal best practices.", |
| type: "embedded", |
| entryCount: ANTIBODY_DESIGN_KB.length, |
| defaultEnabled: true, |
| icon: "🧬", |
| tags: ["design", "pipeline", "developability"], |
| }, |
| { |
| id: "kb-benchmarks", |
| name: "Benchmark Catalog", |
| description: "Interpretation guides, deep dives on JAM-2/Chai-2/RFantibody, benchmark selection criteria for different program types.", |
| type: "embedded", |
| entryCount: BENCHMARK_KB.length, |
| defaultEnabled: true, |
| icon: "📊", |
| tags: ["benchmarks", "evaluation", "comparison"], |
| }, |
| { |
| id: "kb-target-biology", |
| name: "Target Biology", |
| description: "Target class considerations, competitive landscape awareness, epitope differentiation strategies.", |
| type: "embedded", |
| entryCount: TARGET_BIOLOGY_KB.length, |
| defaultEnabled: true, |
| icon: "🎯", |
| tags: ["targets", "biology", "competitive"], |
| }, |
| { |
| id: "kb-model-registry", |
| name: "Model Registry", |
| description: "Model selection decision trees, GPU requirements, scaling guidance for production campaigns.", |
| type: "embedded", |
| entryCount: MODEL_REGISTRY_KB.length, |
| defaultEnabled: false, |
| icon: "🤖", |
| tags: ["models", "infrastructure", "gpu"], |
| }, |
| ]; |
|
|
| |
|
|
| const KB_CONTENT: Record<string, KBEntry[]> = { |
| "kb-antibody-design": ANTIBODY_DESIGN_KB, |
| "kb-benchmarks": BENCHMARK_KB, |
| "kb-target-biology": TARGET_BIOLOGY_KB, |
| "kb-model-registry": MODEL_REGISTRY_KB, |
| }; |
|
|
| |
| |
| |
| |
| |
| export function queryKB(kbId: string, query: string, limit = 3): KBQueryResult | null { |
| const kb = KNOWLEDGE_BASES.find((k) => k.id === kbId); |
| if (!kb) return null; |
|
|
| const entries = KB_CONTENT[kbId]; |
| if (!entries) return null; |
|
|
| const t0 = performance.now(); |
| const q = query.toLowerCase().split(/\s+/).filter(Boolean); |
|
|
| const scored = entries.map((entry) => { |
| const text = `${entry.title} ${entry.content}`.toLowerCase(); |
| const hits = q.filter((w) => text.includes(w)).length; |
| const score = q.length > 0 ? hits / q.length : 0; |
| return { ...entry, score }; |
| }); |
|
|
| scored.sort((a, b) => (b.score ?? 0) - (a.score ?? 0)); |
| const top = scored.slice(0, limit).filter((e) => (e.score ?? 0) > 0); |
| const queryMs = Math.round(performance.now() - t0); |
|
|
| return { kb, entries: top, queryMs }; |
| } |
|
|
| |
| |
| |
| |
| export function queryMultipleKBs( |
| kbIds: string[], |
| query: string, |
| limit = 3, |
| ): KBQueryResult[] { |
| return kbIds |
| .map((id) => queryKB(id, query, limit)) |
| .filter((r): r is KBQueryResult => r !== null && r.entries.length > 0); |
| } |
|
|
| export function listKnowledgeBases(): KnowledgeBase[] { |
| return KNOWLEDGE_BASES; |
| } |
|
|
| export function getKnowledgeBase(id: string): KnowledgeBase | null { |
| return KNOWLEDGE_BASES.find((k) => k.id === id) ?? null; |
| } |
|
|
| export function getDefaultKBIds(): string[] { |
| return KNOWLEDGE_BASES.filter((k) => k.defaultEnabled).map((k) => k.id); |
| } |
|
|