File size: 804 Bytes
e1a9577
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import json
from pathlib import Path
import pandas as pd

DATA_DIR = Path(__file__).resolve().parents[1] / "synthetic_dataset"

def test_files_exist():
    assert (DATA_DIR / "notes_500.jsonl").exists()
    assert (DATA_DIR / "notes_500.csv").exists()

def test_counts_and_fields():
    rows = [json.loads(l) for l in (DATA_DIR / "notes_500.jsonl").read_text().splitlines() if l.strip()]
    assert len(rows) == 500
    required = {"id","note","region","approach","intent","levels","laterality","flags","labels"}
    for r in rows:
        assert required.issubset(r.keys())

    df = pd.read_csv(DATA_DIR / "notes_500.csv")
    assert len(df) == 500
    for col in ["id","note","region","approach","intent","laterality","labels"]:
        assert col in df.columns

print("✅ test_dataset_basic ready")