ProjectMemory / backend /app /vectorstore.py
Amal Nimmy Lal
feat : Project Memory
35765b5
import sqlite3
import sqlite_vec
import struct
from typing import Optional
import os
# Database path - same as main SQLite database
DB_PATH = os.path.join(os.path.dirname(__file__), "..", "project_memory.db")
def _serialize_vector(vec: list[float]) -> bytes:
"""Convert list of floats to bytes for sqlite-vec."""
return struct.pack(f'{len(vec)}f', *vec)
def _get_connection():
"""Get SQLite connection with sqlite-vec loaded."""
conn = sqlite3.connect(DB_PATH)
conn.enable_load_extension(True)
sqlite_vec.load(conn)
conn.enable_load_extension(False)
return conn
def init_vectorstore():
"""Initialize the vector table. Call once at startup."""
conn = _get_connection()
# Metadata table for embeddings
conn.execute("""
CREATE TABLE IF NOT EXISTS embeddings (
id TEXT PRIMARY KEY,
project_id TEXT NOT NULL,
user_id TEXT,
task_id TEXT,
text TEXT,
created_at TEXT
)
""")
# Create index for faster project filtering
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_embeddings_project
ON embeddings(project_id)
""")
# Create virtual table for vector search (768 dims for Gemini)
conn.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS vec_embeddings USING vec0(
id TEXT PRIMARY KEY,
embedding FLOAT[768]
)
""")
conn.commit()
conn.close()
def add_embedding(
log_entry_id: str,
text: str,
embedding: list[float],
metadata: dict
) -> None:
"""Store embedding with metadata."""
conn = _get_connection()
# Store metadata
conn.execute("""
INSERT OR REPLACE INTO embeddings (id, project_id, user_id, task_id, text, created_at)
VALUES (?, ?, ?, ?, ?, ?)
""", (
log_entry_id,
metadata.get("project_id"),
metadata.get("user_id"),
metadata.get("task_id"),
text[:2000], # Truncate long text
metadata.get("created_at")
))
# Store vector
conn.execute("""
INSERT OR REPLACE INTO vec_embeddings (id, embedding)
VALUES (?, ?)
""", (log_entry_id, _serialize_vector(embedding)))
conn.commit()
conn.close()
def search(
query_embedding: list[float],
project_id: str,
n_results: int = 10,
filters: Optional[dict] = None
) -> list[dict]:
"""Search for similar documents within a project."""
conn = _get_connection()
# Vector similarity search with metadata filter
# sqlite-vec uses k parameter in the MATCH clause
query = """
SELECT
e.id,
e.project_id,
e.user_id,
e.task_id,
e.text,
e.created_at,
v.distance
FROM vec_embeddings v
JOIN embeddings e ON v.id = e.id
WHERE v.embedding MATCH ?
AND k = ?
AND e.project_id = ?
"""
params = [_serialize_vector(query_embedding), n_results * 2, project_id]
if filters:
if filters.get("user_id"):
query += " AND e.user_id = ?"
params.append(filters["user_id"])
# Date filters for time-based queries
if filters.get("date_from"):
date_from = filters["date_from"]
if hasattr(date_from, 'isoformat'):
date_from = date_from.isoformat()
query += " AND e.created_at >= ?"
params.append(date_from)
if filters.get("date_to"):
date_to = filters["date_to"]
if hasattr(date_to, 'isoformat'):
date_to = date_to.isoformat()
query += " AND e.created_at < ?"
params.append(date_to)
query += " ORDER BY v.distance LIMIT ?"
params.append(n_results)
results = conn.execute(query, params).fetchall()
conn.close()
return [
{
"id": row[0],
"metadata": {
"project_id": row[1],
"user_id": row[2],
"task_id": row[3],
"text": row[4],
"created_at": row[5]
},
"distance": row[6]
}
for row in results
]
def delete_by_project(project_id: str) -> None:
"""Delete all vectors for a project."""
conn = _get_connection()
# Get IDs to delete
ids = conn.execute(
"SELECT id FROM embeddings WHERE project_id = ?",
(project_id,)
).fetchall()
for (id_,) in ids:
conn.execute("DELETE FROM vec_embeddings WHERE id = ?", (id_,))
conn.execute("DELETE FROM embeddings WHERE project_id = ?", (project_id,))
conn.commit()
conn.close()
def count_embeddings(project_id: Optional[str] = None) -> int:
"""Count embeddings, optionally filtered by project."""
conn = _get_connection()
if project_id:
result = conn.execute(
"SELECT COUNT(*) FROM embeddings WHERE project_id = ?",
(project_id,)
).fetchone()
else:
result = conn.execute("SELECT COUNT(*) FROM embeddings").fetchone()
conn.close()
return result[0]