| | import duckdb |
| | import pandas as pd |
| | from parser import parse_symptoms |
| | from embeddings import compute_weighted_embedding |
| |
|
| | def init_db(csv_path="data/DerivedKnowledgeGraph_final.csv"): |
| | con = duckdb.connect("medical.db") |
| |
|
| | |
| | con.execute("CREATE SEQUENCE IF NOT EXISTS disease_seq START 1;") |
| | con.execute("CREATE SEQUENCE IF NOT EXISTS symptom_seq START 1;") |
| |
|
| | |
| | con.execute(""" |
| | CREATE TABLE IF NOT EXISTS disease ( |
| | disease_id INTEGER PRIMARY KEY DEFAULT nextval('disease_seq'), |
| | name TEXT UNIQUE |
| | ); |
| | """) |
| |
|
| | con.execute(""" |
| | CREATE TABLE IF NOT EXISTS symptom ( |
| | symptom_id INTEGER PRIMARY KEY DEFAULT nextval('symptom_seq'), |
| | name TEXT UNIQUE |
| | ); |
| | """) |
| |
|
| | con.execute(""" |
| | CREATE TABLE IF NOT EXISTS disease_symptom ( |
| | disease_id INTEGER, |
| | symptom_id INTEGER, |
| | incidence FLOAT, |
| | PRIMARY KEY (disease_id, symptom_id) |
| | ); |
| | """) |
| |
|
| | con.execute(""" |
| | CREATE TABLE IF NOT EXISTS disease_embedding ( |
| | disease_id INTEGER, |
| | embedding DOUBLE[] |
| | ); |
| | """) |
| |
|
| | |
| | df = pd.read_csv(csv_path) |
| |
|
| | for _, row in df.iterrows(): |
| | disease = row.iloc[0].strip().lower() |
| | symptoms = parse_symptoms(row.iloc[1]) |
| |
|
| | con.execute("INSERT OR IGNORE INTO disease (name) VALUES (?)", [disease]) |
| | disease_id = con.execute( |
| | "SELECT disease_id FROM disease WHERE name = ?", |
| | [disease] |
| | ).fetchone()[0] |
| |
|
| | for symptom, incidence in symptoms: |
| | con.execute("INSERT OR IGNORE INTO symptom (name) VALUES (?)", [symptom]) |
| | symptom_id = con.execute( |
| | "SELECT symptom_id FROM symptom WHERE name = ?", |
| | [symptom] |
| | ).fetchone()[0] |
| |
|
| | con.execute(""" |
| | INSERT OR IGNORE INTO disease_symptom |
| | VALUES (?, ?, ?) |
| | """, [disease_id, symptom_id, incidence]) |
| |
|
| | embedding = compute_weighted_embedding(symptoms) |
| | con.execute( |
| | "INSERT INTO disease_embedding VALUES (?, ?)", |
| | [disease_id, embedding.tolist()] |
| | ) |
| |
|
| | return con |
| |
|