Spaces:
Sleeping
Sleeping
| """ | |
| Database Migration - Initial Schema | |
| Create tables for documents, chunks, and queries. | |
| """ | |
| from alembic import op | |
| import sqlalchemy as sa | |
| from sqlalchemy.dialects import postgresql | |
| # revision identifiers | |
| revision = '001' | |
| down_revision = None | |
| branch_labels = None | |
| depends_on = None | |
| def upgrade() -> None: | |
| # Create documents table | |
| op.create_table( | |
| 'documents', | |
| sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True), | |
| sa.Column('title', sa.String(500), nullable=False), | |
| sa.Column('filename', sa.String(255), nullable=False), | |
| sa.Column('file_type', sa.String(50), nullable=False), | |
| sa.Column('file_size', sa.BigInteger(), nullable=False), | |
| sa.Column('storage_path', sa.String(1000), nullable=False), | |
| sa.Column('department', sa.String(100), nullable=False), | |
| sa.Column('status', sa.String(50), nullable=False, server_default='pending'), | |
| sa.Column('upload_session_id', sa.String(100), nullable=True), | |
| sa.Column('uploaded_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')), | |
| sa.Column('indexed_at', sa.DateTime(timezone=True), nullable=True), | |
| sa.Column('metadata', postgresql.JSONB(), nullable=False, server_default='{}'), | |
| sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')), | |
| sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')), | |
| ) | |
| # Create indexes for documents | |
| op.create_index('ix_documents_title', 'documents', ['title']) | |
| op.create_index('ix_documents_file_type', 'documents', ['file_type']) | |
| op.create_index('ix_documents_department', 'documents', ['department']) | |
| op.create_index('ix_documents_status', 'documents', ['status']) | |
| op.create_index('ix_documents_department_status', 'documents', ['department', 'status']) | |
| op.create_index('ix_documents_created_at', 'documents', ['created_at']) | |
| # Create document_chunks table | |
| op.create_table( | |
| 'document_chunks', | |
| sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True), | |
| sa.Column('document_id', postgresql.UUID(as_uuid=True), nullable=False), | |
| sa.Column('chunk_index', sa.Integer(), nullable=False), | |
| sa.Column('content', sa.Text(), nullable=False), | |
| sa.Column('token_count', sa.Integer(), nullable=False), | |
| sa.Column('vector_id', sa.String(100), nullable=True), | |
| sa.Column('metadata', postgresql.JSONB(), nullable=False, server_default='{}'), | |
| sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')), | |
| ) | |
| # Create indexes for chunks | |
| op.create_index('ix_chunks_document_id', 'document_chunks', ['document_id']) | |
| op.create_index('ix_chunks_vector_id', 'document_chunks', ['vector_id']) | |
| op.create_index('ix_chunks_document_id_index', 'document_chunks', ['document_id', 'chunk_index']) | |
| # Create queries table | |
| op.create_table( | |
| 'queries', | |
| sa.Column('id', postgresql.UUID(as_uuid=True), primary_key=True), | |
| sa.Column('query_text', sa.Text(), nullable=False), | |
| sa.Column('department', sa.String(100), nullable=False), | |
| sa.Column('user_id', sa.String(100), nullable=True), | |
| sa.Column('session_id', sa.String(100), nullable=True), | |
| sa.Column('status', sa.String(50), nullable=False, server_default='pending'), | |
| sa.Column('answer', sa.Text(), nullable=True), | |
| sa.Column('sources', postgresql.JSONB(), nullable=False, server_default='[]'), | |
| sa.Column('confidence', sa.Integer(), nullable=False, server_default='0'), | |
| sa.Column('duration_ms', sa.Integer(), nullable=False, server_default='0'), | |
| sa.Column('tokens_used', sa.Integer(), nullable=False, server_default='0'), | |
| sa.Column('model', sa.String(100), nullable=True), | |
| sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('now()')), | |
| sa.Column('completed_at', sa.DateTime(timezone=True), nullable=True), | |
| ) | |
| # Create indexes for queries | |
| op.create_index('ix_queries_department', 'queries', ['department']) | |
| op.create_index('ix_queries_user_id', 'queries', ['user_id']) | |
| op.create_index('ix_queries_session_id', 'queries', ['session_id']) | |
| op.create_index('ix_queries_status', 'queries', ['status']) | |
| op.create_index('ix_queries_created_at', 'queries', ['created_at']) | |
| op.create_index('ix_queries_department_created', 'queries', ['department', 'created_at']) | |
| op.create_index('ix_queries_user_created', 'queries', ['user_id', 'created_at']) | |
| def downgrade() -> None: | |
| op.drop_table('queries') | |
| op.drop_table('document_chunks') | |
| op.drop_table('documents') | |