santanche's picture
fix (dbvec): imports and data
10fc8ae
import numpy as np
from embeddings import model
# ==========================================
# Embedding Function (SQL-callable)
# ==========================================
def embed_symptoms(symptom_list):
"""
Takes a list of symptom strings and returns embedding vector.
Callable directly from DuckDB SQL.
"""
if symptom_list is None or len(symptom_list) == 0:
return None
query_text = " ".join(symptom_list)
vector = model.encode([query_text])[0]
return vector.tolist()
# ==========================================
# Cosine Similarity Function (SQL-callable)
# ==========================================
def cosine(v1, v2):
"""
Cosine similarity between two vectors.
Callable directly from DuckDB SQL.
"""
v1 = np.array(v1)
v2 = np.array(v2)
norm_v1 = np.linalg.norm(v1)
norm_v2 = np.linalg.norm(v2)
if norm_v1 == 0 or norm_v2 == 0:
return 0.0
return float(np.dot(v1, v2) / (norm_v1 * norm_v2))
# ==========================================
# Registration Function
# ==========================================
def register_extensions(con):
"""
Registers all Python functions as DuckDB SQL UDFs.
"""
con.create_function(
"embed_symptoms",
embed_symptoms,
return_type="DOUBLE[]"
)
con.create_function(
"cosine",
cosine,
return_type="DOUBLE"
)