| import numpy as np | |
| from embeddings import model | |
| # ========================================== | |
| # Embedding Function (SQL-callable) | |
| # ========================================== | |
| def embed_symptoms(symptom_list): | |
| """ | |
| Takes a list of symptom strings and returns embedding vector. | |
| Callable directly from DuckDB SQL. | |
| """ | |
| if symptom_list is None or len(symptom_list) == 0: | |
| return None | |
| query_text = " ".join(symptom_list) | |
| vector = model.encode([query_text])[0] | |
| return vector.tolist() | |
| # ========================================== | |
| # Cosine Similarity Function (SQL-callable) | |
| # ========================================== | |
| def cosine(v1, v2): | |
| """ | |
| Cosine similarity between two vectors. | |
| Callable directly from DuckDB SQL. | |
| """ | |
| v1 = np.array(v1) | |
| v2 = np.array(v2) | |
| norm_v1 = np.linalg.norm(v1) | |
| norm_v2 = np.linalg.norm(v2) | |
| if norm_v1 == 0 or norm_v2 == 0: | |
| return 0.0 | |
| return float(np.dot(v1, v2) / (norm_v1 * norm_v2)) | |
| # ========================================== | |
| # Registration Function | |
| # ========================================== | |
| def register_extensions(con): | |
| """ | |
| Registers all Python functions as DuckDB SQL UDFs. | |
| """ | |
| con.create_function( | |
| "embed_symptoms", | |
| embed_symptoms, | |
| return_type="DOUBLE[]" | |
| ) | |
| con.create_function( | |
| "cosine", | |
| cosine, | |
| return_type="DOUBLE" | |
| ) | |