Spaces:
Sleeping
Sleeping
File size: 7,009 Bytes
796c5f4 572e624 796c5f4 572e624 796c5f4 572e624 796c5f4 572e624 796c5f4 572e624 796c5f4 572e624 796c5f4 572e624 796c5f4 572e624 796c5f4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | import React from 'react';
import { THEME, tempColor, pHColor, saltColor, O2_COLOR } from '../theme.js';
import { MediaConfBar, OxygenConfArc, IntervalBar, SourceBadge } from './Primitives.jsx';
const TARGETS = [
{ key: 'T', label: 'Temperature optimum', metric: 'MAE', value: '3.28', unit: '°C', color: tempColor(45),
verdict: "Useful — labs incubate in 5°C steps; you'd usually pick the right shelf.",
detail: 'Trained on 17,007 BacDive strains. Cross-validation 5-fold GroupKFold by family.' },
{ key: 'pH', label: 'pH optimum', metric: 'MAE', value: '0.52', unit: '', color: pHColor(7),
verdict: 'Marginal — distinguishes acidic / neutral / alkaline, not finer.',
detail: 'Trained on 4,652 BacDive strains. Quantile regression for 80% prediction interval.' },
{ key: 'O2', label: 'Oxygen requirement', metric: 'F1', value: '0.94', unit: '', color: O2_COLOR,
verdict: 'Strong on fold 0 with LoRA; still needs folds 1-4 before publication-grade validation.',
detail: 'Hybrid oxygen uses LoRA ESM-2 fold 0 when available, with tabular prediction as the deploy fallback.' },
{ key: 'salt', label: 'Salt tolerance', metric: 'MAE', value: '2.51', unit: '%', color: saltColor(3),
verdict: 'Decent — separates freshwater / marine / halotolerant.',
detail: 'Trained on 4,793 BacDive strains.' },
];
export default function Accuracy() {
return (
<div style={{ flex: 1, overflow: 'auto', background: THEME.paper, padding: '24px 28px' }}>
<div style={{ marginBottom: 24, padding: '14px 16px', background: '#ede4cd', border: `1px solid ${THEME.rule}` }}>
<div style={{ font: `500 11px ${THEME.mono}`, color: THEME.accent, letterSpacing: '0.05em', textTransform: 'uppercase', marginBottom: 4 }}>The verdict</div>
<div style={{ font: `400 14px ${THEME.serif}`, color: THEME.ink, fontStyle: 'italic' }}>
Hybrid v2 keeps tabular models for temperature, pH, salt, and media; oxygen uses LoRA when the checkpoint-backed predictions are available.
</div>
</div>
<div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 14, marginBottom: 24 }}>
{TARGETS.map((t) => (
<div key={t.key} style={{ border: `1px solid ${THEME.rule}`, padding: '16px 18px', background: THEME.paper }}>
<div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'baseline', marginBottom: 8 }}>
<span style={{ font: `500 12px ${THEME.font}`, color: THEME.ink }}>{t.label}</span>
<span style={{ font: `400 11px ${THEME.mono}`, color: THEME.inkFaint }}>{t.key === 'O2' ? 'LoRA fold 0' : '5-fold GroupKFold by family'}</span>
</div>
<div style={{ display: 'flex', alignItems: 'baseline', gap: 8, marginBottom: 8 }}>
<span style={{ font: `400 10px ${THEME.mono}`, color: THEME.inkFaint, textTransform: 'uppercase' }}>{t.metric}</span>
<span style={{ font: `500 32px ${THEME.serif}`, color: t.color, fontVariantNumeric: 'tabular-nums', lineHeight: 1 }}>{t.value}</span>
<span style={{ font: `400 12px ${THEME.mono}`, color: THEME.inkSoft }}>{t.unit}</span>
</div>
<div style={{ font: `500 13px ${THEME.serif}`, color: THEME.ink, fontStyle: 'italic', marginBottom: 6 }}>"{t.verdict}"</div>
<div style={{ font: `400 12px ${THEME.font}`, color: THEME.inkSoft, lineHeight: 1.5 }}>{t.detail}</div>
</div>
))}
</div>
<div style={{ borderTop: `1px solid ${THEME.rule}`, paddingTop: 18 }}>
<div style={{ font: `500 11px ${THEME.mono}`, color: THEME.inkSoft, letterSpacing: '0.08em', textTransform: 'uppercase', marginBottom: 8, display: 'flex', alignItems: 'center', gap: 10 }}>
<span>How confidence is calculated</span>
<span style={{ flex: 1, height: 1, background: THEME.rule }} />
</div>
<div style={{ display: 'grid', gridTemplateColumns: 'repeat(3, 1fr)', gap: 12 }}>
<Legend kind="media" />
<Legend kind="oxygen" />
<Legend kind="interval" />
</div>
</div>
<div style={{ marginTop: 24, font: `400 12px ${THEME.font}`, color: THEME.inkSoft, lineHeight: 1.6 }}>
Tabular phenotypes were trained from <span style={{ color: THEME.ink, fontWeight: 500 }}>46,029</span> BacDive-derived strain rows;
LoRA oxygen fold 0 used <span style={{ color: THEME.ink, fontWeight: 500 }}>32,375</span> training rows and <span style={{ color: THEME.ink, fontWeight: 500 }}>8,094</span> validation rows.
The uncultured catalog is <span style={{ color: THEME.ink, fontWeight: 500 }}>5,000</span> held-out
GTDB genomes scored against <span style={{ color: THEME.ink, fontWeight: 500 }}>24</span> DSMZ media.
Features: 353 handcrafted genome statistics — GC, codon usage, tetranucleotide frequencies, amino-acid composition.
XGBoost classifiers handle media and tabular phenotypes; quantile regression XGBoost provides prediction intervals.
</div>
</div>
);
}
function Legend({ kind }) {
if (kind === 'media') {
return (
<div style={{ border: `1px solid ${THEME.rule}`, padding: '12px 14px', background: THEME.paper }}>
<MediaConfBar value={0.72} />
<div style={{ font: `500 12px ${THEME.font}`, color: THEME.ink, marginTop: 8 }}>Media confidence</div>
<div style={{ font: `400 11.5px ${THEME.font}`, color: THEME.inkSoft, lineHeight: 1.5 }}>
Per-medium binary classifier <span style={{ font: `400 11px ${THEME.mono}` }}>predict_proba</span>. Not perfectly calibrated — BacDive only has positive examples.
</div>
</div>
);
}
if (kind === 'oxygen') {
return (
<div style={{ border: `1px solid ${THEME.rule}`, padding: '12px 14px', background: THEME.paper }}>
<div style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
<OxygenConfArc value={0.72} size={36} />
<SourceBadge source="lora" />
</div>
<div style={{ font: `500 12px ${THEME.font}`, color: THEME.ink, marginTop: 8 }}>Oxygen confidence</div>
<div style={{ font: `400 11.5px ${THEME.font}`, color: THEME.inkSoft, lineHeight: 1.5 }}>
LoRA confidence is the max softmax probability across four oxygen classes. Tabular remains the fallback when hybrid artifacts are absent.
</div>
</div>
);
}
return (
<div style={{ border: `1px solid ${THEME.rule}`, padding: '12px 14px', background: THEME.paper }}>
<IntervalBar value={37} lo={32} hi={43} scaleMin={0} scaleMax={80} color={tempColor(37)} unit="°C" height={6} label />
<div style={{ font: `500 12px ${THEME.font}`, color: THEME.ink, marginTop: 6 }}>Prediction interval</div>
<div style={{ font: `400 11.5px ${THEME.font}`, color: THEME.inkSoft, lineHeight: 1.5 }}>
Quantile regression at α=0.1 / 0.9 → 80% PI for T, pH, salt. Wide interval = model uncertain.
</div>
</div>
);
}
|