multimodal_previsit / evaluation /create_test_db.py
frabbani
Fix fact extraction - pass raw data for simple tools.......
8daa8bf
#!/usr/bin/env python3
"""
Create a minimal test database for evaluation framework validation.
This creates a small SQLite database with sample patient data
that can be used to test the evaluation framework.
"""
import sqlite3
import os
from datetime import datetime, timedelta
import random
DB_PATH = "data/fhir.db"
def create_test_database():
"""Create test database with sample data."""
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# Create tables
cursor.executescript("""
-- Patients table
CREATE TABLE IF NOT EXISTS patients (
id TEXT PRIMARY KEY,
given_name TEXT,
family_name TEXT,
birth_date TEXT,
gender TEXT,
marital_status TEXT
);
-- Conditions table
CREATE TABLE IF NOT EXISTS conditions (
id TEXT PRIMARY KEY,
patient_id TEXT,
code TEXT,
display TEXT,
clinical_status TEXT,
onset_date TEXT,
abatement_date TEXT
);
-- Medications table
CREATE TABLE IF NOT EXISTS medications (
id TEXT PRIMARY KEY,
patient_id TEXT,
code TEXT,
display TEXT,
status TEXT,
start_date TEXT
);
-- Observations table
CREATE TABLE IF NOT EXISTS observations (
id TEXT PRIMARY KEY,
patient_id TEXT,
code TEXT,
display TEXT,
value_quantity REAL,
unit TEXT,
effective_date TEXT,
category TEXT
);
-- Allergies table
CREATE TABLE IF NOT EXISTS allergies (
id TEXT PRIMARY KEY,
patient_id TEXT,
substance TEXT,
reaction_display TEXT,
criticality TEXT,
category TEXT
);
-- Immunizations table
CREATE TABLE IF NOT EXISTS immunizations (
id TEXT PRIMARY KEY,
patient_id TEXT,
vaccine_code TEXT,
vaccine_display TEXT,
status TEXT,
occurrence_date TEXT
);
-- Procedures table
CREATE TABLE IF NOT EXISTS procedures (
id TEXT PRIMARY KEY,
patient_id TEXT,
code TEXT,
display TEXT,
status TEXT,
performed_date TEXT
);
-- Encounters table
CREATE TABLE IF NOT EXISTS encounters (
id TEXT PRIMARY KEY,
patient_id TEXT,
status TEXT,
class_code TEXT,
class_display TEXT,
type_code TEXT,
type_display TEXT,
reason_code TEXT,
reason_display TEXT,
period_start TEXT,
period_end TEXT
);
""")
# Create test patients
patients = [
("patient-001", "John", "Smith", "1965-03-15", "male"),
("patient-002", "Mary", "Johnson", "1978-07-22", "female"),
("patient-003", "Robert", "Williams", "1952-11-08", "male"),
]
for pid, given, family, dob, gender in patients:
cursor.execute("""
INSERT OR REPLACE INTO patients (id, given_name, family_name, birth_date, gender)
VALUES (?, ?, ?, ?, ?)
""", (pid, given, family, dob, gender))
# Create conditions
conditions = [
("patient-001", "44054006", "Type 2 Diabetes Mellitus", "active", "2015-06-10"),
("patient-001", "38341003", "Essential Hypertension", "active", "2018-02-15"),
("patient-002", "195967001", "Asthma", "active", "2010-04-20"),
("patient-002", "73211009", "Type 2 Diabetes Mellitus", "active", "2020-01-10"),
("patient-003", "38341003", "Essential Hypertension", "active", "2005-08-12"),
("patient-003", "13644009", "Hypercholesterolemia", "active", "2010-03-25"),
]
for i, (pid, code, display, status, onset) in enumerate(conditions):
cursor.execute("""
INSERT OR REPLACE INTO conditions (id, patient_id, code, display, clinical_status, onset_date)
VALUES (?, ?, ?, ?, ?, ?)
""", (f"cond-{i+1:03d}", pid, code, display, status, onset))
# Create medications
medications = [
("patient-001", "860975", "Metformin 500 MG Oral Tablet", "active", "2015-06-15"),
("patient-001", "314076", "Lisinopril 10 MG Oral Tablet", "active", "2018-02-20"),
("patient-002", "895994", "Albuterol 90 MCG Inhaler", "active", "2010-05-01"),
("patient-002", "860975", "Metformin 500 MG Oral Tablet", "active", "2020-01-15"),
("patient-003", "314076", "Lisinopril 20 MG Oral Tablet", "active", "2005-08-20"),
("patient-003", "316672", "Atorvastatin 20 MG Oral Tablet", "active", "2010-04-01"),
]
for i, (pid, code, display, status, start) in enumerate(medications):
cursor.execute("""
INSERT OR REPLACE INTO medications (id, patient_id, code, display, status, start_date)
VALUES (?, ?, ?, ?, ?, ?)
""", (f"med-{i+1:03d}", pid, code, display, status, start))
# Create observations (vitals)
base_date = datetime.now()
for pid in ["patient-001", "patient-002", "patient-003"]:
obs_id = 1
# Blood pressure readings over last 30 days
for days_ago in range(0, 30, 5):
date = (base_date - timedelta(days=days_ago)).strftime("%Y-%m-%d")
systolic = random.randint(120, 145)
diastolic = random.randint(75, 95)
cursor.execute("""
INSERT OR REPLACE INTO observations
(id, patient_id, code, display, value_quantity, unit, effective_date, category)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (f"obs-{pid}-{obs_id}", pid, "8480-6", "Systolic Blood Pressure",
systolic, "mmHg", date, "vital-signs"))
obs_id += 1
cursor.execute("""
INSERT OR REPLACE INTO observations
(id, patient_id, code, display, value_quantity, unit, effective_date, category)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (f"obs-{pid}-{obs_id}", pid, "8462-4", "Diastolic Blood Pressure",
diastolic, "mmHg", date, "vital-signs"))
obs_id += 1
# Heart rate
hr = random.randint(65, 85)
cursor.execute("""
INSERT OR REPLACE INTO observations
(id, patient_id, code, display, value_quantity, unit, effective_date, category)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (f"obs-{pid}-{obs_id}", pid, "8867-4", "Heart Rate",
hr, "/min", date, "vital-signs"))
obs_id += 1
# A1c readings (quarterly)
for months_ago in [0, 3, 6, 9]:
date = (base_date - timedelta(days=months_ago*30)).strftime("%Y-%m-%d")
a1c = round(random.uniform(6.0, 8.5), 1)
cursor.execute("""
INSERT OR REPLACE INTO observations
(id, patient_id, code, display, value_quantity, unit, effective_date, category)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (f"obs-{pid}-{obs_id}", pid, "4548-4", "Hemoglobin A1c",
a1c, "%", date, "laboratory"))
obs_id += 1
# Create allergies
allergies = [
("patient-001", "Penicillin", "Hives", "high", "medication"),
("patient-002", "Peanuts", "Anaphylaxis", "high", "food"),
("patient-002", "Latex", "Rash", "low", "environment"),
("patient-003", "Sulfa drugs", "Rash", "moderate", "medication"),
]
for i, (pid, substance, reaction, criticality, category) in enumerate(allergies):
cursor.execute("""
INSERT OR REPLACE INTO allergies
(id, patient_id, substance, reaction_display, criticality, category)
VALUES (?, ?, ?, ?, ?, ?)
""", (f"allergy-{i+1:03d}", pid, substance, reaction, criticality, category))
# Create immunizations
immunizations = [
("patient-001", "140", "Influenza Vaccine", "completed", "2024-10-15"),
("patient-001", "207", "COVID-19 Vaccine", "completed", "2024-01-20"),
("patient-002", "140", "Influenza Vaccine", "completed", "2024-11-01"),
("patient-002", "113", "Tdap Vaccine", "completed", "2022-05-10"),
("patient-003", "140", "Influenza Vaccine", "completed", "2024-09-20"),
("patient-003", "33", "Pneumococcal Vaccine", "completed", "2023-03-15"),
]
for i, (pid, code, display, status, date) in enumerate(immunizations):
cursor.execute("""
INSERT OR REPLACE INTO immunizations
(id, patient_id, vaccine_code, vaccine_display, status, occurrence_date)
VALUES (?, ?, ?, ?, ?, ?)
""", (f"imm-{i+1:03d}", pid, code, display, status, date))
# Create procedures
procedures = [
("patient-001", "73761001", "Colonoscopy", "completed", "2023-06-15"),
("patient-002", "80146002", "Appendectomy", "completed", "2015-08-20"),
("patient-003", "232717009", "Coronary Angioplasty", "completed", "2020-02-10"),
("patient-003", "73761001", "Colonoscopy", "completed", "2022-04-05"),
]
for i, (pid, code, display, status, date) in enumerate(procedures):
cursor.execute("""
INSERT OR REPLACE INTO procedures
(id, patient_id, code, display, status, performed_date)
VALUES (?, ?, ?, ?, ?, ?)
""", (f"proc-{i+1:03d}", pid, code, display, status, date))
# Create encounters
for pid in ["patient-001", "patient-002", "patient-003"]:
for i in range(5):
days_ago = i * 60 # Every ~2 months
start = (base_date - timedelta(days=days_ago)).strftime("%Y-%m-%d")
end = start
cursor.execute("""
INSERT OR REPLACE INTO encounters
(id, patient_id, status, class_code, class_display, type_code, type_display,
reason_code, reason_display, period_start, period_end)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (f"enc-{pid}-{i+1}", pid, "finished", "AMB", "ambulatory",
"185349003", "Office Visit", "185349003", "Routine checkup",
start, end))
conn.commit()
conn.close()
print(f"Test database created at {DB_PATH}")
print("Contains:")
print(" - 3 patients")
print(" - 6 conditions")
print(" - 6 medications")
print(" - ~90 observations (vitals + labs)")
print(" - 4 allergies")
print(" - 6 immunizations")
print(" - 4 procedures")
print(" - 15 encounters")
if __name__ == "__main__":
create_test_database()