rag-onboarding-backend / alembic /versions /001_initial_schema.py
Baktabek's picture
Upload folder using huggingface_hub
409c17a verified
"""
Database Migration - Initial Schema
Create tables for documents, chunks, and queries.
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers
revision = '001'
down_revision = None
branch_labels = None
depends_on = None
def upgrade() -> None:
# Create documents table
op.create_table(
'documents',
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column('title', sa.String(500), nullable=False),
sa.Column('filename', sa.String(255), nullable=False),
sa.Column('file_type', sa.String(50), nullable=False),
sa.Column('file_size', sa.BigInteger(), nullable=False),
sa.Column('storage_path', sa.String(1000), nullable=False),
sa.Column('department', sa.String(100), nullable=False),
sa.Column('status', sa.String(50), nullable=False, server_default='pending'),
sa.Column('upload_session_id', sa.String(100), nullable=True),
sa.Column('uploaded_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
sa.Column('indexed_at', sa.DateTime(timezone=True), nullable=True),
sa.Column('metadata', postgresql.JSONB(), nullable=False, server_default='{}'),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
)
# Create indexes for documents
op.create_index('ix_documents_title', 'documents', ['title'])
op.create_index('ix_documents_file_type', 'documents', ['file_type'])
op.create_index('ix_documents_department', 'documents', ['department'])
op.create_index('ix_documents_status', 'documents', ['status'])
op.create_index('ix_documents_department_status', 'documents', ['department', 'status'])
op.create_index('ix_documents_created_at', 'documents', ['created_at'])
# Create document_chunks table
op.create_table(
'document_chunks',
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column('document_id', postgresql.UUID(as_uuid=True), nullable=False),
sa.Column('chunk_index', sa.Integer(), nullable=False),
sa.Column('content', sa.Text(), nullable=False),
sa.Column('token_count', sa.Integer(), nullable=False),
sa.Column('vector_id', sa.String(100), nullable=True),
sa.Column('metadata', postgresql.JSONB(), nullable=False, server_default='{}'),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
)
# Create indexes for chunks
op.create_index('ix_chunks_document_id', 'document_chunks', ['document_id'])
op.create_index('ix_chunks_vector_id', 'document_chunks', ['vector_id'])
op.create_index('ix_chunks_document_id_index', 'document_chunks', ['document_id', 'chunk_index'])
# Create queries table
op.create_table(
'queries',
sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column('query_text', sa.Text(), nullable=False),
sa.Column('department', sa.String(100), nullable=False),
sa.Column('user_id', sa.String(100), nullable=True),
sa.Column('session_id', sa.String(100), nullable=True),
sa.Column('status', sa.String(50), nullable=False, server_default='pending'),
sa.Column('answer', sa.Text(), nullable=True),
sa.Column('sources', postgresql.JSONB(), nullable=False, server_default='[]'),
sa.Column('confidence', sa.Integer(), nullable=False, server_default='0'),
sa.Column('duration_ms', sa.Integer(), nullable=False, server_default='0'),
sa.Column('tokens_used', sa.Integer(), nullable=False, server_default='0'),
sa.Column('model', sa.String(100), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')),
sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True),
)
# Create indexes for queries
op.create_index('ix_queries_department', 'queries', ['department'])
op.create_index('ix_queries_user_id', 'queries', ['user_id'])
op.create_index('ix_queries_session_id', 'queries', ['session_id'])
op.create_index('ix_queries_status', 'queries', ['status'])
op.create_index('ix_queries_created_at', 'queries', ['created_at'])
op.create_index('ix_queries_department_created', 'queries', ['department', 'created_at'])
op.create_index('ix_queries_user_created', 'queries', ['user_id', 'created_at'])
def downgrade() -> None:
op.drop_table('queries')
op.drop_table('document_chunks')
op.drop_table('documents')