sml-agents-publish-subscribe-dbvec

Sleeping

App Files Files Community

sml-agents-publish-subscribe-dbvec / database.py

santanche

fix (dbvec): wrong internal import

e0d8955 about 2 months ago

raw

history blame contribute delete

2.16 kB

	import duckdb
	import pandas as pd
	from parser import parse_symptoms
	from embeddings import compute_weighted_embedding

	def init_db(csv_path="data/DerivedKnowledgeGraph_final.csv"):
	con = duckdb.connect("medical.db")

	# Sequences
	con.execute("CREATE SEQUENCE IF NOT EXISTS disease_seq START 1;")
	con.execute("CREATE SEQUENCE IF NOT EXISTS symptom_seq START 1;")

	# Tables
	con.execute("""
	CREATE TABLE IF NOT EXISTS disease (
	disease_id INTEGER PRIMARY KEY DEFAULT nextval('disease_seq'),
	name TEXT UNIQUE
	);
	""")

	con.execute("""
	CREATE TABLE IF NOT EXISTS symptom (
	symptom_id INTEGER PRIMARY KEY DEFAULT nextval('symptom_seq'),
	name TEXT UNIQUE
	);
	""")

	con.execute("""
	CREATE TABLE IF NOT EXISTS disease_symptom (
	disease_id INTEGER,
	symptom_id INTEGER,
	incidence FLOAT,
	PRIMARY KEY (disease_id, symptom_id)
	);
	""")

	con.execute("""
	CREATE TABLE IF NOT EXISTS disease_embedding (
	disease_id INTEGER,
	embedding DOUBLE[]
	);
	""")

	# Load CSV
	df = pd.read_csv(csv_path)

	for _, row in df.iterrows():
	disease = row.iloc[0].strip().lower()
	symptoms = parse_symptoms(row.iloc[1])

	con.execute("INSERT OR IGNORE INTO disease (name) VALUES (?)", [disease])
	disease_id = con.execute(
	"SELECT disease_id FROM disease WHERE name = ?",
	[disease]
	).fetchone()[0]

	for symptom, incidence in symptoms:
	con.execute("INSERT OR IGNORE INTO symptom (name) VALUES (?)", [symptom])
	symptom_id = con.execute(
	"SELECT symptom_id FROM symptom WHERE name = ?",
	[symptom]
	).fetchone()[0]

	con.execute("""
	INSERT OR IGNORE INTO disease_symptom
	VALUES (?, ?, ?)
	""", [disease_id, symptom_id, incidence])

	embedding = compute_weighted_embedding(symptoms)
	con.execute(
	"INSERT INTO disease_embedding VALUES (?, ?)",
	[disease_id, embedding.tolist()]
	)

	return con