Spaces:
Running
Running
File size: 3,217 Bytes
6c30d74 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | """Test BacDive phenotype extraction against a fixture of the real v2 schema."""
from __future__ import annotations
from microbe_model.data.bacdive import _derive_optimum, extract_phenotypes
# Trimmed-down version of a real /v2/fetch/24493 response (Phaeobacter gallaeciensis BS 107).
SAMPLE_RECORD = {
"General": {
"BacDive-ID": 24493,
"NCBI tax id": [
{"NCBI tax id": 1423144, "Matching level": "strain"},
{"NCBI tax id": 60890, "Matching level": "species"},
],
},
"Name and taxonomic classification": {
"LPSN": {
"domain": "Bacteria",
"phylum": "Pseudomonadota",
"class": "Alphaproteobacteria",
"order": "Rhodobacterales",
"family": "Roseobacteraceae",
"genus": "Phaeobacter",
"species": "Phaeobacter gallaeciensis",
},
"genus": "Phaeobacter",
"species": "Phaeobacter gallaeciensis",
},
"Culture and growth conditions": {
"culture temp": [
{"growth": "positive", "type": "growth", "temperature": "25"},
{"growth": "positive", "type": "growth", "temperature": "22"},
{"growth": "positive", "type": "growth", "temperature": "5-30"},
{"growth": "negative", "type": "growth", "temperature": "37"},
],
},
"Physiology and metabolism": {
"oxygen tolerance": [{"oxygen tolerance": "obligate aerobe"}],
},
"Sequence information": {
"Genome sequences": [
{"INSDC accession": "GCA_000511385", "assembly level": "complete"},
{"INSDC accession": "GCA_000819625", "assembly level": "contig"},
],
},
}
def test_extract_phenotypes_real_schema() -> None:
out = extract_phenotypes(SAMPLE_RECORD)
assert out["bacdive_id"] == 24493
assert out["species"] == "Phaeobacter gallaeciensis"
assert out["genus"] == "Phaeobacter"
assert out["family"] == "Roseobacteraceae"
assert out["ncbi_taxon_id"] == 1423144
assert out["genome_accession"] == "GCA_000511385" # first listed
assert out["oxygen_requirement"] == "obligate aerobe"
# Three positive-growth temps: 25, 22, midpoint(5-30)=17.5 → median = 22
assert out["optimal_temperature_c"] == 22.0
def test_derive_optimum_prefers_explicit_optimum() -> None:
entries = [
{"type": "growth", "growth": "positive", "temperature": "30"},
{"type": "optimum", "temperature": "37"},
{"type": "growth", "growth": "positive", "temperature": "25"},
]
assert _derive_optimum(entries, "temperature") == 37.0
def test_derive_optimum_falls_back_to_growth_median() -> None:
entries = [
{"type": "growth", "growth": "positive", "temperature": "20"},
{"type": "growth", "growth": "positive", "temperature": "30"},
{"type": "growth", "growth": "negative", "temperature": "45"}, # ignored
]
assert _derive_optimum(entries, "temperature") == 25.0
def test_extract_phenotypes_handles_missing_fields() -> None:
out = extract_phenotypes({})
assert out["bacdive_id"] is None
assert out["genome_accession"] is None
assert out["optimal_temperature_c"] is None
|