import React from 'react'; import { THEME, tempColor, pHColor, saltColor, O2_COLOR } from '../theme.js'; import { MediaConfBar, OxygenConfArc, IntervalBar, SourceBadge } from './Primitives.jsx'; const TARGETS = [ { key: 'T', label: 'Temperature optimum', metric: 'MAE', value: '3.28', unit: '°C', color: tempColor(45), verdict: "Useful — labs incubate in 5°C steps; you'd usually pick the right shelf.", detail: 'Trained on 17,007 BacDive strains. Cross-validation 5-fold GroupKFold by family.' }, { key: 'pH', label: 'pH optimum', metric: 'MAE', value: '0.52', unit: '', color: pHColor(7), verdict: 'Marginal — distinguishes acidic / neutral / alkaline, not finer.', detail: 'Trained on 4,652 BacDive strains. Quantile regression for 80% prediction interval.' }, { key: 'O2', label: 'Oxygen requirement', metric: 'F1', value: '0.94', unit: '', color: O2_COLOR, verdict: 'Strong on fold 0 with LoRA; still needs folds 1-4 before publication-grade validation.', detail: 'Hybrid oxygen uses LoRA ESM-2 fold 0 when available, with tabular prediction as the deploy fallback.' }, { key: 'salt', label: 'Salt tolerance', metric: 'MAE', value: '2.51', unit: '%', color: saltColor(3), verdict: 'Decent — separates freshwater / marine / halotolerant.', detail: 'Trained on 4,793 BacDive strains.' }, ]; export default function Accuracy() { return (
The verdict
Hybrid v2 keeps tabular models for temperature, pH, salt, and media; oxygen uses LoRA when the checkpoint-backed predictions are available.
{TARGETS.map((t) => (
{t.label} {t.key === 'O2' ? 'LoRA fold 0' : '5-fold GroupKFold by family'}
{t.metric} {t.value} {t.unit}
"{t.verdict}"
{t.detail}
))}
How confidence is calculated
Tabular phenotypes were trained from 46,029 BacDive-derived strain rows; LoRA oxygen fold 0 used 32,375 training rows and 8,094 validation rows. The uncultured catalog is 5,000 held-out GTDB genomes scored against 24 DSMZ media. Features: 353 handcrafted genome statistics — GC, codon usage, tetranucleotide frequencies, amino-acid composition. XGBoost classifiers handle media and tabular phenotypes; quantile regression XGBoost provides prediction intervals.
); } function Legend({ kind }) { if (kind === 'media') { return (
Media confidence
Per-medium binary classifier predict_proba. Not perfectly calibrated — BacDive only has positive examples.
); } if (kind === 'oxygen') { return (
Oxygen confidence
LoRA confidence is the max softmax probability across four oxygen classes. Tabular remains the fallback when hybrid artifacts are absent.
); } return (
Prediction interval
Quantile regression at α=0.1 / 0.9 → 80% PI for T, pH, salt. Wide interval = model uncertain.
); }