File size: 1,339 Bytes
0ed74db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from __future__ import annotations

import importlib.util
from pathlib import Path

import pandas as pd


def load_script_module():
    script = Path(__file__).resolve().parents[1] / "scripts" / "42_prepare_external_benchmarks.py"
    spec = importlib.util.spec_from_file_location("external_benchmark_prep", script)
    assert spec is not None
    assert spec.loader is not None
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module


def test_local_fasta_path_accepts_common_suffixes(tmp_path: Path) -> None:
    mod = load_script_module()
    fasta = tmp_path / "GCF_000001.fna.gz"
    fasta.write_text(">contig\nATGC\n")

    assert mod.local_fasta_path(tmp_path, "GCF_000001") == fasta
    assert mod.local_fasta_path(tmp_path, "GCF_000002") is None


def test_fasta_coverage_counts_unique_accessions(tmp_path: Path) -> None:
    mod = load_script_module()
    (tmp_path / "GCF_000001.fna").write_text(">contig\nATGC\n")
    manifest = pd.DataFrame(
        {
            "genome_accession": ["GCF_000001", "GCF_000001", "GCF_000002"],
        }
    )

    coverage = mod.fasta_coverage(manifest, tmp_path)

    assert coverage["unique_accessions"] == 2
    assert coverage["present_fastas"] == 1
    assert coverage["missing_fastas"] == 1
    assert coverage["coverage_pct"] == 50.0