| import json | |
| from pathlib import Path | |
| import pandas as pd | |
| DATA_DIR = Path(__file__).resolve().parents[1] / "synthetic_dataset" | |
| def test_files_exist(): | |
| assert (DATA_DIR / "notes_500.jsonl").exists() | |
| assert (DATA_DIR / "notes_500.csv").exists() | |
| def test_counts_and_fields(): | |
| rows = [json.loads(l) for l in (DATA_DIR / "notes_500.jsonl").read_text().splitlines() if l.strip()] | |
| assert len(rows) == 500 | |
| required = {"id","note","region","approach","intent","levels","laterality","flags","labels"} | |
| for r in rows: | |
| assert required.issubset(r.keys()) | |
| df = pd.read_csv(DATA_DIR / "notes_500.csv") | |
| assert len(df) == 500 | |
| for col in ["id","note","region","approach","intent","laterality","labels"]: | |
| assert col in df.columns | |
| print("✅ test_dataset_basic ready") | |