Spaces:
Running
Running
| """Test BacDive phenotype extraction against a fixture of the real v2 schema.""" | |
| from __future__ import annotations | |
| from microbe_model.data.bacdive import _derive_optimum, extract_phenotypes | |
| # Trimmed-down version of a real /v2/fetch/24493 response (Phaeobacter gallaeciensis BS 107). | |
| SAMPLE_RECORD = { | |
| "General": { | |
| "BacDive-ID": 24493, | |
| "NCBI tax id": [ | |
| {"NCBI tax id": 1423144, "Matching level": "strain"}, | |
| {"NCBI tax id": 60890, "Matching level": "species"}, | |
| ], | |
| }, | |
| "Name and taxonomic classification": { | |
| "LPSN": { | |
| "domain": "Bacteria", | |
| "phylum": "Pseudomonadota", | |
| "class": "Alphaproteobacteria", | |
| "order": "Rhodobacterales", | |
| "family": "Roseobacteraceae", | |
| "genus": "Phaeobacter", | |
| "species": "Phaeobacter gallaeciensis", | |
| }, | |
| "genus": "Phaeobacter", | |
| "species": "Phaeobacter gallaeciensis", | |
| }, | |
| "Culture and growth conditions": { | |
| "culture temp": [ | |
| {"growth": "positive", "type": "growth", "temperature": "25"}, | |
| {"growth": "positive", "type": "growth", "temperature": "22"}, | |
| {"growth": "positive", "type": "growth", "temperature": "5-30"}, | |
| {"growth": "negative", "type": "growth", "temperature": "37"}, | |
| ], | |
| }, | |
| "Physiology and metabolism": { | |
| "oxygen tolerance": [{"oxygen tolerance": "obligate aerobe"}], | |
| }, | |
| "Sequence information": { | |
| "Genome sequences": [ | |
| {"INSDC accession": "GCA_000511385", "assembly level": "complete"}, | |
| {"INSDC accession": "GCA_000819625", "assembly level": "contig"}, | |
| ], | |
| }, | |
| } | |
| def test_extract_phenotypes_real_schema() -> None: | |
| out = extract_phenotypes(SAMPLE_RECORD) | |
| assert out["bacdive_id"] == 24493 | |
| assert out["species"] == "Phaeobacter gallaeciensis" | |
| assert out["genus"] == "Phaeobacter" | |
| assert out["family"] == "Roseobacteraceae" | |
| assert out["ncbi_taxon_id"] == 1423144 | |
| assert out["genome_accession"] == "GCA_000511385" # first listed | |
| assert out["oxygen_requirement"] == "obligate aerobe" | |
| # Three positive-growth temps: 25, 22, midpoint(5-30)=17.5 → median = 22 | |
| assert out["optimal_temperature_c"] == 22.0 | |
| def test_derive_optimum_prefers_explicit_optimum() -> None: | |
| entries = [ | |
| {"type": "growth", "growth": "positive", "temperature": "30"}, | |
| {"type": "optimum", "temperature": "37"}, | |
| {"type": "growth", "growth": "positive", "temperature": "25"}, | |
| ] | |
| assert _derive_optimum(entries, "temperature") == 37.0 | |
| def test_derive_optimum_falls_back_to_growth_median() -> None: | |
| entries = [ | |
| {"type": "growth", "growth": "positive", "temperature": "20"}, | |
| {"type": "growth", "growth": "positive", "temperature": "30"}, | |
| {"type": "growth", "growth": "negative", "temperature": "45"}, # ignored | |
| ] | |
| assert _derive_optimum(entries, "temperature") == 25.0 | |
| def test_extract_phenotypes_handles_missing_fields() -> None: | |
| out = extract_phenotypes({}) | |
| assert out["bacdive_id"] is None | |
| assert out["genome_accession"] is None | |
| assert out["optimal_temperature_c"] is None | |