File size: 2,689 Bytes
027123c
 
 
 
2ba0613
 
 
 
 
 
 
 
027123c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
767625e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""Database initialization."""

from sqlalchemy import text
from src.db.postgres.connection import engine, Base
from src.db.postgres.models import (
    ChatMessage,
    DatabaseClient,
    Document,
    MessageSource,
    Room,
    User,
)


async def init_db():
    """Initialize database tables and required extensions."""
    async with engine.begin() as conn:
        # Create pgvector extension using two separate statements.
        # Must NOT be combined into one string — asyncpg rejects multi-statement
        # prepared statements (langchain_postgres bug workaround via create_extension=False).
        await conn.execute(text("SELECT pg_advisory_xact_lock(1573678846307946496)"))
        await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))

        # Create application tables
        await conn.run_sync(Base.metadata.create_all)

        # Schema migrations (idempotent — safe to run on every startup)
        await conn.execute(text(
            "ALTER TABLE rooms ADD COLUMN IF NOT EXISTS status VARCHAR NOT NULL DEFAULT 'active'"
        ))

        # HNSW index for fast approximate vector similarity search
        # Only created when the embedding column has explicit dimensions (HNSW requirement).
        # atttypmod > 0 means the vector column was created with a dimension (e.g. vector(1536));
        # atttypmod = -1 means dimensionless — HNSW would fail with "column does not have dimensions".
        await conn.execute(text("""
            DO $$
            BEGIN
                IF EXISTS (
                    SELECT FROM pg_attribute a
                    JOIN pg_class c ON c.oid = a.attrelid
                    WHERE c.relname = 'langchain_pg_embedding'
                      AND a.attname = 'embedding'
                      AND a.atttypmod > 0
                ) THEN
                    CREATE INDEX IF NOT EXISTS idx_langchain_pg_embedding_hnsw
                    ON langchain_pg_embedding USING hnsw (embedding vector_cosine_ops);
                END IF;
            END $$
        """))

        # GIN index for FTS on schema chunks — only created if table exists
        # (langchain_pg_embedding is created by PGVector on first use, not by create_all)
        await conn.execute(text("""
            DO $$
            BEGIN
                IF EXISTS (
                    SELECT FROM information_schema.tables
                    WHERE table_name = 'langchain_pg_embedding'
                ) THEN
                    CREATE INDEX IF NOT EXISTS idx_langchain_pg_embedding_fts
                    ON langchain_pg_embedding USING GIN (to_tsvector('english', document));
                END IF;
            END $$
        """))