-- ============================================ -- MEXAR RAG Migration Script -- Run this in Supabase SQL Editor -- ============================================ -- 1. Enable pgvector extension (if not already) CREATE EXTENSION IF NOT EXISTS vector; -- 2. Clear existing chunks (required due to dimension change) DELETE FROM document_chunks; -- 3. Alter embedding dimension: 384 → 1024 ALTER TABLE document_chunks ALTER COLUMN embedding TYPE vector(1024); -- 4. Add tsvector column for keyword search ALTER TABLE document_chunks ADD COLUMN IF NOT EXISTS content_tsvector TSVECTOR; -- 5. Add chunk metadata columns ALTER TABLE document_chunks ADD COLUMN IF NOT EXISTS chunk_index INTEGER, ADD COLUMN IF NOT EXISTS section_title TEXT, ADD COLUMN IF NOT EXISTS token_count INTEGER; -- 6. Create HNSW index for fast cosine similarity DROP INDEX IF EXISTS chunks_embedding_idx; DROP INDEX IF EXISTS chunks_embedding_hnsw; CREATE INDEX chunks_embedding_hnsw ON document_chunks USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64); -- 7. Create GIN index for full-text search CREATE INDEX IF NOT EXISTS chunks_content_gin ON document_chunks USING GIN (content_tsvector); -- 8. Create trigger to auto-update tsvector CREATE OR REPLACE FUNCTION update_tsvector() RETURNS TRIGGER AS $$ BEGIN NEW.content_tsvector := to_tsvector('english', COALESCE(NEW.content, '')); RETURN NEW; END; $$ LANGUAGE plpgsql; DROP TRIGGER IF EXISTS tsvector_update ON document_chunks; CREATE TRIGGER tsvector_update BEFORE INSERT OR UPDATE ON document_chunks FOR EACH ROW EXECUTE FUNCTION update_tsvector(); -- 9. Add agent metadata columns for full Supabase storage ALTER TABLE agents ADD COLUMN IF NOT EXISTS knowledge_graph_json JSONB, ADD COLUMN IF NOT EXISTS domain_signature JSONB, ADD COLUMN IF NOT EXISTS prompt_analysis JSONB, ADD COLUMN IF NOT EXISTS compilation_stats JSONB, ADD COLUMN IF NOT EXISTS chunk_count INTEGER DEFAULT 0; -- 10. Update existing tsvector data UPDATE document_chunks SET content_tsvector = to_tsvector('english', content) WHERE content_tsvector IS NULL; -- 11. Create hybrid search function CREATE OR REPLACE FUNCTION hybrid_search( query_embedding vector(1024), query_text text, target_agent_id integer, match_count integer DEFAULT 20 ) RETURNS TABLE ( id integer, content text, source text, semantic_rank integer, keyword_rank integer, rrf_score float ) AS $$ BEGIN RETURN QUERY WITH semantic AS ( SELECT dc.id, dc.content, dc.source, ROW_NUMBER() OVER (ORDER BY dc.embedding <=> query_embedding)::integer as rank FROM document_chunks dc WHERE dc.agent_id = target_agent_id ORDER BY dc.embedding <=> query_embedding LIMIT match_count ), keyword AS ( SELECT dc.id, dc.content, dc.source, ROW_NUMBER() OVER (ORDER BY ts_rank(dc.content_tsvector, plainto_tsquery('english', query_text)) DESC)::integer as rank FROM document_chunks dc WHERE dc.agent_id = target_agent_id AND dc.content_tsvector @@ plainto_tsquery('english', query_text) LIMIT match_count ) SELECT COALESCE(s.id, k.id) as id, COALESCE(s.content, k.content) as content, COALESCE(s.source, k.source) as source, s.rank as semantic_rank, k.rank as keyword_rank, (COALESCE(1.0/(60 + s.rank), 0) + COALESCE(1.0/(60 + k.rank), 0))::float as rrf_score FROM semantic s FULL OUTER JOIN keyword k ON s.id = k.id ORDER BY rrf_score DESC LIMIT match_count; END; $$ LANGUAGE plpgsql; -- Done! Verify with: -- SELECT * FROM pg_indexes WHERE tablename = 'document_chunks';