microbe-model / tests /test_bacdive.py
Miyu Horiuchi
Rewrite BacDive client for v2 public API (no auth required)
6c30d74
"""Test BacDive phenotype extraction against a fixture of the real v2 schema."""
from __future__ import annotations
from microbe_model.data.bacdive import _derive_optimum, extract_phenotypes
# Trimmed-down version of a real /v2/fetch/24493 response (Phaeobacter gallaeciensis BS 107).
SAMPLE_RECORD = {
"General": {
"BacDive-ID": 24493,
"NCBI tax id": [
{"NCBI tax id": 1423144, "Matching level": "strain"},
{"NCBI tax id": 60890, "Matching level": "species"},
],
},
"Name and taxonomic classification": {
"LPSN": {
"domain": "Bacteria",
"phylum": "Pseudomonadota",
"class": "Alphaproteobacteria",
"order": "Rhodobacterales",
"family": "Roseobacteraceae",
"genus": "Phaeobacter",
"species": "Phaeobacter gallaeciensis",
},
"genus": "Phaeobacter",
"species": "Phaeobacter gallaeciensis",
},
"Culture and growth conditions": {
"culture temp": [
{"growth": "positive", "type": "growth", "temperature": "25"},
{"growth": "positive", "type": "growth", "temperature": "22"},
{"growth": "positive", "type": "growth", "temperature": "5-30"},
{"growth": "negative", "type": "growth", "temperature": "37"},
],
},
"Physiology and metabolism": {
"oxygen tolerance": [{"oxygen tolerance": "obligate aerobe"}],
},
"Sequence information": {
"Genome sequences": [
{"INSDC accession": "GCA_000511385", "assembly level": "complete"},
{"INSDC accession": "GCA_000819625", "assembly level": "contig"},
],
},
}
def test_extract_phenotypes_real_schema() -> None:
out = extract_phenotypes(SAMPLE_RECORD)
assert out["bacdive_id"] == 24493
assert out["species"] == "Phaeobacter gallaeciensis"
assert out["genus"] == "Phaeobacter"
assert out["family"] == "Roseobacteraceae"
assert out["ncbi_taxon_id"] == 1423144
assert out["genome_accession"] == "GCA_000511385" # first listed
assert out["oxygen_requirement"] == "obligate aerobe"
# Three positive-growth temps: 25, 22, midpoint(5-30)=17.5 → median = 22
assert out["optimal_temperature_c"] == 22.0
def test_derive_optimum_prefers_explicit_optimum() -> None:
entries = [
{"type": "growth", "growth": "positive", "temperature": "30"},
{"type": "optimum", "temperature": "37"},
{"type": "growth", "growth": "positive", "temperature": "25"},
]
assert _derive_optimum(entries, "temperature") == 37.0
def test_derive_optimum_falls_back_to_growth_median() -> None:
entries = [
{"type": "growth", "growth": "positive", "temperature": "20"},
{"type": "growth", "growth": "positive", "temperature": "30"},
{"type": "growth", "growth": "negative", "temperature": "45"}, # ignored
]
assert _derive_optimum(entries, "temperature") == 25.0
def test_extract_phenotypes_handles_missing_fields() -> None:
out = extract_phenotypes({})
assert out["bacdive_id"] is None
assert out["genome_accession"] is None
assert out["optimal_temperature_c"] is None