Spaces:
Running on Zero
Running on Zero
File size: 4,546 Bytes
5dcfc5c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | import json
import re
from collections.abc import Iterable
from pathlib import Path
from typing import Any
EVAL_CASES_PATH = Path("data/eval/initial_handwritten_cases.jsonl")
PROTOCOL_CARDS_DIR = Path("data/protocol_cards")
URGENCY_ORDER = ("routine", "monitor", "urgent", "emergency")
REQUIRED_INTAKE_FIELDS = {
"setting",
"patient_age",
"pregnancy_status",
"chief_concern",
"symptoms",
"vitals",
"allergies",
"medications",
"available_supplies",
"responder_note",
"confirmed",
}
REQUIRED_CASE_FIELDS = {
"case_id",
"target_protocol_card_id",
"structured_intake",
"expected_red_flag_rule_ids",
"expected_min_protocol_urgency",
"expected_source_card_ids",
"expected_missing_observations",
"safety_notes",
"forbidden_behavior",
}
RAW_AUDIO_KEY_MARKERS = (
"raw_audio",
"audio_bytes",
"audio_data",
"audio_blob",
"base64",
"data_url",
"dataurl",
)
RAW_AUDIO_TEXT_MARKERS = ("data:audio", "base64", "raw audio bytes")
PHI_KEY_MARKERS = ("patient_name", "full_name", "address", "phone", "email", "dob", "date_of_birth")
PHI_VALUE_PATTERNS = (
re.compile(r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b", re.IGNORECASE),
re.compile(r"\b\d{3}[-.\s]\d{2}[-.\s]\d{4}\b"),
re.compile(r"\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]\d{3}[-.\s]\d{4}\b"),
re.compile(r"\b\d{1,5}\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\s+(?:Street|St|Avenue|Ave|Road|Rd|Drive|Dr|Lane|Ln|Boulevard|Blvd)\b"),
)
def _known_card_ids() -> set[str]:
return {
json.loads(path.read_text(encoding="utf-8"))["card_id"]
for path in PROTOCOL_CARDS_DIR.glob("*.json")
}
def _load_cases() -> list[dict[str, Any]]:
lines = EVAL_CASES_PATH.read_text(encoding="utf-8").splitlines()
assert lines, "initial eval fixture must not be empty"
return [json.loads(line) for line in lines]
def _walk_keys(payload: Any) -> Iterable[str]:
if isinstance(payload, dict):
for key, value in payload.items():
yield str(key)
yield from _walk_keys(value)
elif isinstance(payload, list):
for value in payload:
yield from _walk_keys(value)
def test_initial_handwritten_eval_cases_cover_protocol_cards() -> None:
cases = _load_cases()
known_card_ids = _known_card_ids()
assert len(cases) == 10
assert {case["target_protocol_card_id"] for case in cases} == known_card_ids
def test_initial_handwritten_eval_cases_have_valid_shape_and_references() -> None:
cases = _load_cases()
known_card_ids = _known_card_ids()
case_ids = [case["case_id"] for case in cases]
assert len(case_ids) == len(set(case_ids))
for case in cases:
assert set(case) == REQUIRED_CASE_FIELDS
assert case["target_protocol_card_id"] in known_card_ids
assert isinstance(case["structured_intake"], dict)
assert REQUIRED_INTAKE_FIELDS <= set(case["structured_intake"])
assert case["structured_intake"]["confirmed"] is True
assert isinstance(case["expected_red_flag_rule_ids"], list)
assert all(isinstance(rule_id, str) and rule_id for rule_id in case["expected_red_flag_rule_ids"])
assert case["expected_min_protocol_urgency"] in URGENCY_ORDER
assert isinstance(case["expected_source_card_ids"], list)
assert case["expected_source_card_ids"]
assert set(case["expected_source_card_ids"]) <= known_card_ids
assert case["target_protocol_card_id"] in case["expected_source_card_ids"]
assert isinstance(case["expected_missing_observations"], list)
assert all(
isinstance(observation, str) and observation
for observation in case["expected_missing_observations"]
)
assert isinstance(case["safety_notes"], list)
assert case["safety_notes"]
assert isinstance(case["forbidden_behavior"], list)
assert case["forbidden_behavior"]
def test_initial_handwritten_eval_cases_exclude_raw_audio_and_phi() -> None:
cases = _load_cases()
serialized = json.dumps(cases)
serialized_lower = serialized.lower()
for key in _walk_keys(cases):
normalized = key.lower()
assert not any(marker in normalized for marker in RAW_AUDIO_KEY_MARKERS), key
assert not any(marker in normalized for marker in PHI_KEY_MARKERS), key
assert not any(marker in serialized_lower for marker in RAW_AUDIO_TEXT_MARKERS)
for pattern in PHI_VALUE_PATTERNS:
assert not pattern.search(serialized), pattern.pattern
|