File size: 2,689 Bytes
027123c 2ba0613 027123c 52999bc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | """Database initialization."""
from sqlalchemy import text
from src.db.postgres.connection import engine, Base
from src.db.postgres.models import (
ChatMessage,
DatabaseClient,
Document,
MessageSource,
Room,
User,
)
async def init_db():
"""Initialize database tables and required extensions."""
async with engine.begin() as conn:
# Create pgvector extension using two separate statements.
# Must NOT be combined into one string — asyncpg rejects multi-statement
# prepared statements (langchain_postgres bug workaround via create_extension=False).
await conn.execute(text("SELECT pg_advisory_xact_lock(1573678846307946496)"))
await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
# Create application tables
await conn.run_sync(Base.metadata.create_all)
# Schema migrations (idempotent — safe to run on every startup)
await conn.execute(text(
"ALTER TABLE rooms ADD COLUMN IF NOT EXISTS status VARCHAR NOT NULL DEFAULT 'active'"
))
# HNSW index for fast approximate vector similarity search
# Only created when the embedding column has explicit dimensions (HNSW requirement).
# atttypmod > 0 means the vector column was created with a dimension (e.g. vector(1536));
# atttypmod = -1 means dimensionless — HNSW would fail with "column does not have dimensions".
await conn.execute(text("""
DO $$
BEGIN
IF EXISTS (
SELECT FROM pg_attribute a
JOIN pg_class c ON c.oid = a.attrelid
WHERE c.relname = 'langchain_pg_embedding'
AND a.attname = 'embedding'
AND a.atttypmod > 0
) THEN
CREATE INDEX IF NOT EXISTS idx_langchain_pg_embedding_hnsw
ON langchain_pg_embedding USING hnsw (embedding vector_cosine_ops);
END IF;
END $$
"""))
# GIN index for FTS on schema chunks — only created if table exists
# (langchain_pg_embedding is created by PGVector on first use, not by create_all)
await conn.execute(text("""
DO $$
BEGIN
IF EXISTS (
SELECT FROM information_schema.tables
WHERE table_name = 'langchain_pg_embedding'
) THEN
CREATE INDEX IF NOT EXISTS idx_langchain_pg_embedding_fts
ON langchain_pg_embedding USING GIN (to_tsvector('english', document));
END IF;
END $$
"""))
|