Spaces:
Sleeping
Sleeping
feat: Add AI metadata extraction, latency prediction, context-aware routing, and tool output schemas
d1e5882
| """ | |
| Database Migration Script: Add Metadata Support | |
| This script updates the documents table to add: | |
| - metadata (JSONB) column for storing extracted metadata | |
| - doc_id (TEXT) column for grouping chunks from the same document | |
| Run this script after deploying the metadata extraction feature. | |
| """ | |
| import os | |
| import sys | |
| from pathlib import Path | |
| # Add parent directory to path to import backend modules | |
| project_root = Path(__file__).parent.parent.parent | |
| sys.path.insert(0, str(project_root)) | |
| from dotenv import load_dotenv | |
| import psycopg2 | |
| load_dotenv() | |
| # Get database connection from environment | |
| DATABASE_URL = os.getenv("POSTGRESQL_URL") | |
| def get_connection(): | |
| """ | |
| Establish a direct PostgreSQL connection. | |
| """ | |
| if not DATABASE_URL: | |
| raise ValueError( | |
| "PostgreSQL connection string not configured. " | |
| "Set POSTGRESQL_URL in your .env file." | |
| ) | |
| return psycopg2.connect(DATABASE_URL) | |
| def migrate_database(): | |
| """ | |
| Add metadata and doc_id columns to the documents table. | |
| """ | |
| print("π Starting database migration: Adding metadata support...") | |
| try: | |
| conn = get_connection() | |
| cur = conn.cursor() | |
| # Check if columns already exist | |
| cur.execute(""" | |
| SELECT column_name | |
| FROM information_schema.columns | |
| WHERE table_name = 'documents' | |
| AND column_name IN ('metadata', 'doc_id'); | |
| """) | |
| existing_columns = {row[0] for row in cur.fetchall()} | |
| # Add metadata column if it doesn't exist | |
| if 'metadata' not in existing_columns: | |
| print(" β Adding 'metadata' JSONB column...") | |
| cur.execute(""" | |
| ALTER TABLE documents | |
| ADD COLUMN metadata JSONB; | |
| """) | |
| print(" β 'metadata' column added successfully") | |
| else: | |
| print(" β 'metadata' column already exists") | |
| # Add doc_id column if it doesn't exist | |
| if 'doc_id' not in existing_columns: | |
| print(" β Adding 'doc_id' TEXT column...") | |
| cur.execute(""" | |
| ALTER TABLE documents | |
| ADD COLUMN doc_id TEXT; | |
| """) | |
| print(" β 'doc_id' column added successfully") | |
| else: | |
| print(" β 'doc_id' column already exists") | |
| # Create index on doc_id for faster lookups (optional but recommended) | |
| try: | |
| print(" β Creating index on 'doc_id'...") | |
| cur.execute(""" | |
| CREATE INDEX IF NOT EXISTS documents_doc_id_idx | |
| ON documents (doc_id); | |
| """) | |
| print(" β Index on 'doc_id' created successfully") | |
| except Exception as e: | |
| print(f" β οΈ Index creation skipped (may already exist): {e}") | |
| # Create GIN index on metadata for JSONB queries (optional but recommended) | |
| try: | |
| print(" β Creating GIN index on 'metadata'...") | |
| cur.execute(""" | |
| CREATE INDEX IF NOT EXISTS documents_metadata_idx | |
| ON documents USING GIN (metadata); | |
| """) | |
| print(" β GIN index on 'metadata' created successfully") | |
| except Exception as e: | |
| print(f" β οΈ GIN index creation skipped (may already exist): {e}") | |
| conn.commit() | |
| cur.close() | |
| conn.close() | |
| print("\nβ Database migration completed successfully!") | |
| print("\nThe documents table now supports:") | |
| print(" - metadata (JSONB): Stores extracted metadata (title, summary, tags, topics, etc.)") | |
| print(" - doc_id (TEXT): Groups chunks from the same document") | |
| print("\nNew documents will automatically have metadata extracted during ingestion.") | |
| return True | |
| except Exception as e: | |
| print(f"\nβ Migration failed: {e}") | |
| print("\nTroubleshooting:") | |
| print(" 1. Ensure PostgreSQL is running") | |
| print(" 2. Check POSTGRESQL_URL in your .env file") | |
| print(" 3. Verify you have permissions to alter the table") | |
| print(" 4. Check if the documents table exists") | |
| return False | |
| def verify_migration(): | |
| """ | |
| Verify that the migration was successful by checking column existence. | |
| """ | |
| print("\nπ Verifying migration...") | |
| try: | |
| conn = get_connection() | |
| cur = conn.cursor() | |
| cur.execute(""" | |
| SELECT column_name, data_type | |
| FROM information_schema.columns | |
| WHERE table_name = 'documents' | |
| AND column_name IN ('metadata', 'doc_id') | |
| ORDER BY column_name; | |
| """) | |
| columns = cur.fetchall() | |
| if len(columns) == 2: | |
| print(" β Both columns exist:") | |
| for col_name, col_type in columns: | |
| print(f" - {col_name}: {col_type}") | |
| return True | |
| else: | |
| print(f" β οΈ Found {len(columns)} column(s), expected 2") | |
| for col_name, col_type in columns: | |
| print(f" - {col_name}: {col_type}") | |
| return False | |
| except Exception as e: | |
| print(f" β Verification failed: {e}") | |
| return False | |
| finally: | |
| try: | |
| cur.close() | |
| conn.close() | |
| except: | |
| pass | |
| if __name__ == "__main__": | |
| print("=" * 60) | |
| print("Database Migration: Add Metadata Support") | |
| print("=" * 60) | |
| print() | |
| # Check if database connection is available | |
| try: | |
| conn = get_connection() | |
| conn.close() | |
| print("β Database connection successful\n") | |
| except Exception as e: | |
| print(f"β Cannot connect to database: {e}") | |
| print("\nPlease check:") | |
| print(" 1. PostgreSQL is running") | |
| print(" 2. POSTGRESQL_URL is set in .env file") | |
| print(" 3. Database credentials are correct") | |
| sys.exit(1) | |
| # Run migration | |
| success = migrate_database() | |
| if success: | |
| # Verify migration | |
| verify_migration() | |
| print("\n" + "=" * 60) | |
| print("Migration completed! You can now use metadata extraction.") | |
| print("=" * 60) | |
| else: | |
| print("\n" + "=" * 60) | |
| print("Migration failed. Please check the errors above.") | |
| print("=" * 60) | |
| sys.exit(1) | |