#!/usr/bin/env python3 """ Create a minimal test database for evaluation framework validation. This creates a small SQLite database with sample patient data that can be used to test the evaluation framework. """ import sqlite3 import os from datetime import datetime, timedelta import random DB_PATH = "data/fhir.db" def create_test_database(): """Create test database with sample data.""" os.makedirs(os.path.dirname(DB_PATH), exist_ok=True) conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() # Create tables cursor.executescript(""" -- Patients table CREATE TABLE IF NOT EXISTS patients ( id TEXT PRIMARY KEY, given_name TEXT, family_name TEXT, birth_date TEXT, gender TEXT, marital_status TEXT ); -- Conditions table CREATE TABLE IF NOT EXISTS conditions ( id TEXT PRIMARY KEY, patient_id TEXT, code TEXT, display TEXT, clinical_status TEXT, onset_date TEXT, abatement_date TEXT ); -- Medications table CREATE TABLE IF NOT EXISTS medications ( id TEXT PRIMARY KEY, patient_id TEXT, code TEXT, display TEXT, status TEXT, start_date TEXT ); -- Observations table CREATE TABLE IF NOT EXISTS observations ( id TEXT PRIMARY KEY, patient_id TEXT, code TEXT, display TEXT, value_quantity REAL, unit TEXT, effective_date TEXT, category TEXT ); -- Allergies table CREATE TABLE IF NOT EXISTS allergies ( id TEXT PRIMARY KEY, patient_id TEXT, substance TEXT, reaction_display TEXT, criticality TEXT, category TEXT ); -- Immunizations table CREATE TABLE IF NOT EXISTS immunizations ( id TEXT PRIMARY KEY, patient_id TEXT, vaccine_code TEXT, vaccine_display TEXT, status TEXT, occurrence_date TEXT ); -- Procedures table CREATE TABLE IF NOT EXISTS procedures ( id TEXT PRIMARY KEY, patient_id TEXT, code TEXT, display TEXT, status TEXT, performed_date TEXT ); -- Encounters table CREATE TABLE IF NOT EXISTS encounters ( id TEXT PRIMARY KEY, patient_id TEXT, status TEXT, class_code TEXT, class_display TEXT, type_code TEXT, type_display TEXT, reason_code TEXT, reason_display TEXT, period_start TEXT, period_end TEXT ); """) # Create test patients patients = [ ("patient-001", "John", "Smith", "1965-03-15", "male"), ("patient-002", "Mary", "Johnson", "1978-07-22", "female"), ("patient-003", "Robert", "Williams", "1952-11-08", "male"), ] for pid, given, family, dob, gender in patients: cursor.execute(""" INSERT OR REPLACE INTO patients (id, given_name, family_name, birth_date, gender) VALUES (?, ?, ?, ?, ?) """, (pid, given, family, dob, gender)) # Create conditions conditions = [ ("patient-001", "44054006", "Type 2 Diabetes Mellitus", "active", "2015-06-10"), ("patient-001", "38341003", "Essential Hypertension", "active", "2018-02-15"), ("patient-002", "195967001", "Asthma", "active", "2010-04-20"), ("patient-002", "73211009", "Type 2 Diabetes Mellitus", "active", "2020-01-10"), ("patient-003", "38341003", "Essential Hypertension", "active", "2005-08-12"), ("patient-003", "13644009", "Hypercholesterolemia", "active", "2010-03-25"), ] for i, (pid, code, display, status, onset) in enumerate(conditions): cursor.execute(""" INSERT OR REPLACE INTO conditions (id, patient_id, code, display, clinical_status, onset_date) VALUES (?, ?, ?, ?, ?, ?) """, (f"cond-{i+1:03d}", pid, code, display, status, onset)) # Create medications medications = [ ("patient-001", "860975", "Metformin 500 MG Oral Tablet", "active", "2015-06-15"), ("patient-001", "314076", "Lisinopril 10 MG Oral Tablet", "active", "2018-02-20"), ("patient-002", "895994", "Albuterol 90 MCG Inhaler", "active", "2010-05-01"), ("patient-002", "860975", "Metformin 500 MG Oral Tablet", "active", "2020-01-15"), ("patient-003", "314076", "Lisinopril 20 MG Oral Tablet", "active", "2005-08-20"), ("patient-003", "316672", "Atorvastatin 20 MG Oral Tablet", "active", "2010-04-01"), ] for i, (pid, code, display, status, start) in enumerate(medications): cursor.execute(""" INSERT OR REPLACE INTO medications (id, patient_id, code, display, status, start_date) VALUES (?, ?, ?, ?, ?, ?) """, (f"med-{i+1:03d}", pid, code, display, status, start)) # Create observations (vitals) base_date = datetime.now() for pid in ["patient-001", "patient-002", "patient-003"]: obs_id = 1 # Blood pressure readings over last 30 days for days_ago in range(0, 30, 5): date = (base_date - timedelta(days=days_ago)).strftime("%Y-%m-%d") systolic = random.randint(120, 145) diastolic = random.randint(75, 95) cursor.execute(""" INSERT OR REPLACE INTO observations (id, patient_id, code, display, value_quantity, unit, effective_date, category) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, (f"obs-{pid}-{obs_id}", pid, "8480-6", "Systolic Blood Pressure", systolic, "mmHg", date, "vital-signs")) obs_id += 1 cursor.execute(""" INSERT OR REPLACE INTO observations (id, patient_id, code, display, value_quantity, unit, effective_date, category) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, (f"obs-{pid}-{obs_id}", pid, "8462-4", "Diastolic Blood Pressure", diastolic, "mmHg", date, "vital-signs")) obs_id += 1 # Heart rate hr = random.randint(65, 85) cursor.execute(""" INSERT OR REPLACE INTO observations (id, patient_id, code, display, value_quantity, unit, effective_date, category) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, (f"obs-{pid}-{obs_id}", pid, "8867-4", "Heart Rate", hr, "/min", date, "vital-signs")) obs_id += 1 # A1c readings (quarterly) for months_ago in [0, 3, 6, 9]: date = (base_date - timedelta(days=months_ago*30)).strftime("%Y-%m-%d") a1c = round(random.uniform(6.0, 8.5), 1) cursor.execute(""" INSERT OR REPLACE INTO observations (id, patient_id, code, display, value_quantity, unit, effective_date, category) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, (f"obs-{pid}-{obs_id}", pid, "4548-4", "Hemoglobin A1c", a1c, "%", date, "laboratory")) obs_id += 1 # Create allergies allergies = [ ("patient-001", "Penicillin", "Hives", "high", "medication"), ("patient-002", "Peanuts", "Anaphylaxis", "high", "food"), ("patient-002", "Latex", "Rash", "low", "environment"), ("patient-003", "Sulfa drugs", "Rash", "moderate", "medication"), ] for i, (pid, substance, reaction, criticality, category) in enumerate(allergies): cursor.execute(""" INSERT OR REPLACE INTO allergies (id, patient_id, substance, reaction_display, criticality, category) VALUES (?, ?, ?, ?, ?, ?) """, (f"allergy-{i+1:03d}", pid, substance, reaction, criticality, category)) # Create immunizations immunizations = [ ("patient-001", "140", "Influenza Vaccine", "completed", "2024-10-15"), ("patient-001", "207", "COVID-19 Vaccine", "completed", "2024-01-20"), ("patient-002", "140", "Influenza Vaccine", "completed", "2024-11-01"), ("patient-002", "113", "Tdap Vaccine", "completed", "2022-05-10"), ("patient-003", "140", "Influenza Vaccine", "completed", "2024-09-20"), ("patient-003", "33", "Pneumococcal Vaccine", "completed", "2023-03-15"), ] for i, (pid, code, display, status, date) in enumerate(immunizations): cursor.execute(""" INSERT OR REPLACE INTO immunizations (id, patient_id, vaccine_code, vaccine_display, status, occurrence_date) VALUES (?, ?, ?, ?, ?, ?) """, (f"imm-{i+1:03d}", pid, code, display, status, date)) # Create procedures procedures = [ ("patient-001", "73761001", "Colonoscopy", "completed", "2023-06-15"), ("patient-002", "80146002", "Appendectomy", "completed", "2015-08-20"), ("patient-003", "232717009", "Coronary Angioplasty", "completed", "2020-02-10"), ("patient-003", "73761001", "Colonoscopy", "completed", "2022-04-05"), ] for i, (pid, code, display, status, date) in enumerate(procedures): cursor.execute(""" INSERT OR REPLACE INTO procedures (id, patient_id, code, display, status, performed_date) VALUES (?, ?, ?, ?, ?, ?) """, (f"proc-{i+1:03d}", pid, code, display, status, date)) # Create encounters for pid in ["patient-001", "patient-002", "patient-003"]: for i in range(5): days_ago = i * 60 # Every ~2 months start = (base_date - timedelta(days=days_ago)).strftime("%Y-%m-%d") end = start cursor.execute(""" INSERT OR REPLACE INTO encounters (id, patient_id, status, class_code, class_display, type_code, type_display, reason_code, reason_display, period_start, period_end) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, (f"enc-{pid}-{i+1}", pid, "finished", "AMB", "ambulatory", "185349003", "Office Visit", "185349003", "Routine checkup", start, end)) conn.commit() conn.close() print(f"Test database created at {DB_PATH}") print("Contains:") print(" - 3 patients") print(" - 6 conditions") print(" - 6 medications") print(" - ~90 observations (vitals + labs)") print(" - 4 allergies") print(" - 6 immunizations") print(" - 4 procedures") print(" - 15 encounters") if __name__ == "__main__": create_test_database()