import json from pathlib import Path import pandas as pd DATA_DIR = Path(__file__).resolve().parents[1] / "synthetic_dataset" def test_files_exist(): assert (DATA_DIR / "notes_500.jsonl").exists() assert (DATA_DIR / "notes_500.csv").exists() def test_counts_and_fields(): rows = [json.loads(l) for l in (DATA_DIR / "notes_500.jsonl").read_text().splitlines() if l.strip()] assert len(rows) == 500 required = {"id","note","region","approach","intent","levels","laterality","flags","labels"} for r in rows: assert required.issubset(r.keys()) df = pd.read_csv(DATA_DIR / "notes_500.csv") assert len(df) == 500 for col in ["id","note","region","approach","intent","laterality","labels"]: assert col in df.columns print("✅ test_dataset_basic ready")