Vertebro / spine_coder /tests /test_dataset_basic.py
Slaiwala's picture
Add spine_coder package (core + dataset + tests)
e1a9577
import json
from pathlib import Path
import pandas as pd
DATA_DIR = Path(__file__).resolve().parents[1] / "synthetic_dataset"
def test_files_exist():
assert (DATA_DIR / "notes_500.jsonl").exists()
assert (DATA_DIR / "notes_500.csv").exists()
def test_counts_and_fields():
rows = [json.loads(l) for l in (DATA_DIR / "notes_500.jsonl").read_text().splitlines() if l.strip()]
assert len(rows) == 500
required = {"id","note","region","approach","intent","levels","laterality","flags","labels"}
for r in rows:
assert required.issubset(r.keys())
df = pd.read_csv(DATA_DIR / "notes_500.csv")
assert len(df) == 500
for col in ["id","note","region","approach","intent","laterality","labels"]:
assert col in df.columns
print("✅ test_dataset_basic ready")