ALM-2 / backend /tests /init_supabase_database.py
ACA050's picture
Upload 520 files
2ed8996 verified
"""
Supabase Database Initialization Script for AegisLM
This script creates all necessary tables, indexes, and initial data
for the AegisLM platform on Supabase PostgreSQL.
"""
import asyncio
import sys
from pathlib import Path
import logging
from sqlalchemy import text
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.orm import sessionmaker
# Add backend to path
backend_path = Path(__file__).parent
sys.path.insert(0, str(backend_path))
from core.config import settings
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
async def create_database_tables():
"""Create all database tables for AegisLM"""
logger.info("🚀 Starting Supabase Database Initialization")
# Create async engine
engine = create_async_engine(
settings.DATABASE_URL,
echo=True, # Show SQL for debugging
pool_pre_ping=True,
pool_recycle=3600,
pool_size=settings.DATABASE_POOL_SIZE,
max_overflow=settings.DATABASE_MAX_OVERFLOW,
)
# Create session factory
AsyncSessionLocal = sessionmaker(
engine, class_=AsyncSession, expire_on_commit=False
)
try:
async with AsyncSessionLocal() as session:
await session.begin()
# Create extensions
await create_extensions(session)
# Create tables in order
await create_users_table(session)
await create_experiments_table(session)
await create_datasets_table(session)
await create_audit_trails_table(session)
await create_subscriptions_table(session)
await create_invoices_table(session)
await create_payment_methods_table(session)
await create_benchmarks_table(session)
await create_analytics_table(session)
await create_notifications_table(session)
await create_system_logs_table(session)
# Create indexes
await create_indexes(session)
# Create initial data
await create_initial_data(session)
await session.commit()
logger.info("✅ All tables created successfully!")
except Exception as e:
logger.error(f"❌ Database initialization failed: {e}")
await session.rollback()
raise
finally:
await engine.dispose()
async def create_extensions(session: AsyncSession):
"""Create necessary PostgreSQL extensions"""
logger.info("📋 Creating PostgreSQL extensions...")
extensions = [
"CREATE EXTENSION IF NOT EXISTS \"uuid-ossp\"",
"CREATE EXTENSION IF NOT EXISTS \"pg_trgm\"",
"CREATE EXTENSION IF NOT EXISTS \"btree_gin\"",
"CREATE EXTENSION IF NOT EXISTS \"btree_gist\"",
]
for ext in extensions:
try:
await session.execute(text(ext))
logger.info(f"✅ Extension created: {ext}")
except Exception as e:
logger.warning(f"⚠️ Extension creation warning: {e}")
async def create_users_table(session: AsyncSession):
"""Create users table"""
logger.info("👥 Creating users table...")
create_table_sql = """
CREATE TABLE IF NOT EXISTS users (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE NOT NULL,
email VARCHAR(255) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
full_name VARCHAR(255),
company VARCHAR(255),
is_active BOOLEAN DEFAULT TRUE,
is_verified BOOLEAN DEFAULT FALSE,
is_premium BOOLEAN DEFAULT FALSE,
role VARCHAR(50) DEFAULT 'user',
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
last_login TIMESTAMP WITH TIME ZONE,
email_verified_at TIMESTAMP WITH TIME ZONE,
phone VARCHAR(50),
avatar_url TEXT,
preferences JSONB DEFAULT '{}',
metadata JSONB DEFAULT '{}',
subscription_id INTEGER REFERENCES subscriptions(id),
stripe_customer_id VARCHAR(255)
);
"""
await session.execute(text(create_table_sql))
logger.info("✅ Users table created")
async def create_experiments_table(session: AsyncSession):
"""Create experiments table"""
logger.info("🧪 Creating experiments table...")
create_table_sql = """
CREATE TABLE IF NOT EXISTS experiments (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE NOT NULL,
run_id UUID UNIQUE NOT NULL,
experiment_name VARCHAR(255) NOT NULL,
description TEXT,
config_snapshot JSONB NOT NULL,
status VARCHAR(50) DEFAULT 'pending',
priority INTEGER DEFAULT 5,
created_by INTEGER REFERENCES users(id),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
started_at TIMESTAMP WITH TIME ZONE,
completed_at TIMESTAMP WITH TIME ZONE,
dataset_type VARCHAR(100),
dataset_version VARCHAR(50),
model_name VARCHAR(255),
parameters JSONB DEFAULT '{}',
results JSONB DEFAULT '{}',
metrics JSONB DEFAULT '{}',
error_message TEXT,
retry_count INTEGER DEFAULT 0,
max_retries INTEGER DEFAULT 3,
tags TEXT[],
is_public BOOLEAN DEFAULT FALSE,
archived_at TIMESTAMP WITH TIME ZONE
);
"""
await session.execute(text(create_table_sql))
logger.info("✅ Experiments table created")
async def create_datasets_table(session: AsyncSession):
"""Create datasets table"""
logger.info("📊 Creating datasets table...")
create_table_sql = """
CREATE TABLE IF NOT EXISTS datasets (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE NOT NULL,
name VARCHAR(255) NOT NULL,
description TEXT,
type VARCHAR(100) NOT NULL,
version VARCHAR(50) NOT NULL,
file_path TEXT,
file_size BIGINT,
file_hash VARCHAR(255),
format VARCHAR(50) DEFAULT 'json',
schema JSONB DEFAULT '{}',
statistics JSONB DEFAULT '{}',
created_by INTEGER REFERENCES users(id),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
is_public BOOLEAN DEFAULT FALSE,
download_count INTEGER DEFAULT 0,
tags TEXT[],
metadata JSONB DEFAULT '{}',
quality_score DECIMAL(3,2),
validation_status VARCHAR(50) DEFAULT 'pending'
);
"""
await session.execute(text(create_table_sql))
logger.info("✅ Datasets table created")
async def create_audit_trails_table(session: AsyncSession):
"""Create audit trails table"""
logger.info("🔍 Creating audit trails table...")
create_table_sql = """
CREATE TABLE IF NOT EXISTS audit_trails (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE NOT NULL,
experiment_id UUID REFERENCES experiments(uuid),
user_id INTEGER REFERENCES users(id),
action VARCHAR(100) NOT NULL,
entity_type VARCHAR(100),
entity_id VARCHAR(255),
old_values JSONB,
new_values JSONB,
config_hash VARCHAR(255),
result_checksum VARCHAR(255),
deterministic_seed INTEGER,
timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
ip_address INET,
user_agent TEXT,
session_id VARCHAR(255),
metadata JSONB DEFAULT '{}'
);
"""
await session.execute(text(create_table_sql))
logger.info("✅ Audit trails table created")
async def create_subscriptions_table(session: AsyncSession):
"""Create subscriptions table"""
logger.info("💳 Creating subscriptions table...")
create_table_sql = """
CREATE TABLE IF NOT EXISTS subscriptions (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE NOT NULL,
user_id INTEGER REFERENCES users(id),
plan_name VARCHAR(100) NOT NULL,
stripe_subscription_id VARCHAR(255) UNIQUE,
status VARCHAR(50) DEFAULT 'active',
current_period_start TIMESTAMP WITH TIME ZONE,
current_period_end TIMESTAMP WITH TIME ZONE,
trial_end TIMESTAMP WITH TIME ZONE,
canceled_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
amount DECIMAL(10,2),
currency VARCHAR(3) DEFAULT 'USD',
interval VARCHAR(20) DEFAULT 'month',
interval_count INTEGER DEFAULT 1,
metadata JSONB DEFAULT '{}',
auto_renew BOOLEAN DEFAULT TRUE
);
"""
await session.execute(text(create_table_sql))
logger.info("✅ Subscriptions table created")
async def create_invoices_table(session: AsyncSession):
"""Create invoices table"""
logger.info("🧾 Creating invoices table...")
create_table_sql = """
CREATE TABLE IF NOT EXISTS invoices (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE NOT NULL,
user_id INTEGER REFERENCES users(id),
subscription_id INTEGER REFERENCES subscriptions(id),
stripe_invoice_id VARCHAR(255) UNIQUE,
number VARCHAR(255),
status VARCHAR(50) DEFAULT 'draft',
amount DECIMAL(10,2) NOT NULL,
currency VARCHAR(3) DEFAULT 'USD',
due_date TIMESTAMP WITH TIME ZONE,
paid_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
description TEXT,
line_items JSONB DEFAULT '[]',
metadata JSONB DEFAULT '{}',
attempt_count INTEGER DEFAULT 0,
next_payment_attempt TIMESTAMP WITH TIME ZONE
);
"""
await session.execute(text(create_table_sql))
logger.info("✅ Invoices table created")
async def create_payment_methods_table(session: AsyncSession):
"""Create payment methods table"""
logger.info("💰 Creating payment methods table...")
create_table_sql = """
CREATE TABLE IF NOT EXISTS payment_methods (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE NOT NULL,
user_id INTEGER REFERENCES users(id),
stripe_payment_method_id VARCHAR(255) UNIQUE,
type VARCHAR(50) NOT NULL,
brand VARCHAR(50),
last4 VARCHAR(4),
exp_month INTEGER,
exp_year INTEGER,
is_default BOOLEAN DEFAULT FALSE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
metadata JSONB DEFAULT '{}'
);
"""
await session.execute(text(create_table_sql))
logger.info("✅ Payment methods table created")
async def create_benchmarks_table(session: AsyncSession):
"""Create benchmarks table"""
logger.info("📈 Creating benchmarks table...")
create_table_sql = """
CREATE TABLE IF NOT EXISTS benchmarks (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE NOT NULL,
name VARCHAR(255) NOT NULL,
description TEXT,
dataset_type VARCHAR(100),
dataset_version VARCHAR(50),
models TEXT[] NOT NULL,
metrics JSONB DEFAULT '{}',
results JSONB DEFAULT '{}',
created_by INTEGER REFERENCES users(id),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
is_public BOOLEAN DEFAULT FALSE,
status VARCHAR(50) DEFAULT 'active',
tags TEXT[],
metadata JSONB DEFAULT '{}',
leaderboard JSONB DEFAULT '{}'
);
"""
await session.execute(text(create_table_sql))
logger.info("✅ Benchmarks table created")
async def create_analytics_table(session: AsyncSession):
"""Create analytics table"""
logger.info("📊 Creating analytics table...")
create_table_sql = """
CREATE TABLE IF NOT EXISTS analytics (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE NOT NULL,
user_id INTEGER REFERENCES users(id),
event_type VARCHAR(100) NOT NULL,
event_data JSONB DEFAULT '{}',
session_id VARCHAR(255),
ip_address INET,
user_agent TEXT,
timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
metadata JSONB DEFAULT '{}'
);
"""
await session.execute(text(create_table_sql))
logger.info("✅ Analytics table created")
async def create_notifications_table(session: AsyncSession):
"""Create notifications table"""
logger.info("🔔 Creating notifications table...")
create_table_sql = """
CREATE TABLE IF NOT EXISTS notifications (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE NOT NULL,
user_id INTEGER REFERENCES users(id),
type VARCHAR(100) NOT NULL,
title VARCHAR(255) NOT NULL,
message TEXT NOT NULL,
data JSONB DEFAULT '{}',
is_read BOOLEAN DEFAULT FALSE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
read_at TIMESTAMP WITH TIME ZONE,
expires_at TIMESTAMP WITH TIME ZONE,
priority VARCHAR(20) DEFAULT 'normal',
channels TEXT[] DEFAULT ARRAY['in_app']
);
"""
await session.execute(text(create_table_sql))
logger.info("✅ Notifications table created")
async def create_system_logs_table(session: AsyncSession):
"""Create system logs table"""
logger.info("📝 Creating system logs table...")
create_table_sql = """
CREATE TABLE IF NOT EXISTS system_logs (
id SERIAL PRIMARY KEY,
uuid UUID DEFAULT uuid_generate_v4() UNIQUE NOT NULL,
level VARCHAR(20) NOT NULL,
message TEXT NOT NULL,
module VARCHAR(100),
function VARCHAR(100),
line_number INTEGER,
stack_trace TEXT,
user_id INTEGER REFERENCES users(id),
session_id VARCHAR(255),
ip_address INET,
user_agent TEXT,
timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
metadata JSONB DEFAULT '{}'
);
"""
await session.execute(text(create_table_sql))
logger.info("✅ System logs table created")
async def create_indexes(session: AsyncSession):
"""Create performance indexes"""
logger.info("🔍 Creating indexes...")
indexes = [
# Users indexes
"CREATE INDEX IF NOT EXISTS idx_users_email ON users(email);",
"CREATE INDEX IF NOT EXISTS idx_users_uuid ON users(uuid);",
"CREATE INDEX IF NOT EXISTS idx_users_active ON users(is_active) WHERE is_active = TRUE;",
# Experiments indexes
"CREATE INDEX IF NOT EXISTS idx_experiments_run_id ON experiments(run_id);",
"CREATE INDEX IF NOT EXISTS idx_experiments_user_id ON experiments(created_by);",
"CREATE INDEX IF NOT EXISTS idx_experiments_status ON experiments(status);",
"CREATE INDEX IF NOT EXISTS idx_experiments_created_at ON experiments(created_at);",
"CREATE INDEX IF NOT EXISTS idx_experiments_dataset ON experiments(dataset_type, dataset_version);",
# Datasets indexes
"CREATE INDEX IF NOT EXISTS idx_datasets_type_version ON datasets(type, version);",
"CREATE INDEX IF NOT EXISTS idx_datasets_user_id ON datasets(created_by);",
"CREATE INDEX IF NOT EXISTS idx_datasets_public ON datasets(is_public) WHERE is_public = TRUE;",
# Audit trails indexes
"CREATE INDEX IF NOT EXISTS idx_audit_experiments ON audit_trails(experiment_id);",
"CREATE INDEX IF NOT EXISTS idx_audit_user_id ON audit_trails(user_id);",
"CREATE INDEX IF NOT EXISTS idx_audit_timestamp ON audit_trails(timestamp);",
# Subscriptions indexes
"CREATE INDEX IF NOT EXISTS idx_subscriptions_user_id ON subscriptions(user_id);",
"CREATE INDEX IF NOT EXISTS idx_subscriptions_stripe_id ON subscriptions(stripe_subscription_id);",
"CREATE INDEX IF NOT EXISTS idx_subscriptions_status ON subscriptions(status);",
# Analytics indexes
"CREATE INDEX IF NOT EXISTS idx_analytics_user_id ON analytics(user_id);",
"CREATE INDEX IF NOT EXISTS idx_analytics_event_type ON analytics(event_type);",
"CREATE INDEX IF NOT EXISTS idx_analytics_timestamp ON analytics(timestamp);",
# Notifications indexes
"CREATE INDEX IF NOT EXISTS idx_notifications_user_id ON notifications(user_id);",
"CREATE INDEX IF NOT EXISTS idx_notifications_unread ON notifications(user_id) WHERE is_read = FALSE;",
# System logs indexes
"CREATE INDEX IF NOT EXISTS idx_logs_level ON system_logs(level);",
"CREATE INDEX IF NOT EXISTS idx_logs_timestamp ON system_logs(timestamp);",
"CREATE INDEX IF NOT EXISTS idx_logs_module ON system_logs(module);",
]
for index_sql in indexes:
try:
await session.execute(text(index_sql))
logger.info(f"✅ Index created: {index_sql.split('idx_')[1].split(' ')[0]}")
except Exception as e:
logger.warning(f"⚠️ Index creation warning: {e}")
async def create_initial_data(session: AsyncSession):
"""Create initial data"""
logger.info("🌱 Creating initial data...")
# Create default admin user (password: admin123)
try:
from core.security import get_password_hash
admin_password_hash = get_password_hash("admin123")
insert_admin_sql = """
INSERT INTO users (email, password_hash, full_name, role, is_active, is_verified, is_premium)
VALUES ('admin@aegislm.com', :password_hash, 'System Admin', 'admin', TRUE, TRUE, TRUE)
ON CONFLICT (email) DO NOTHING;
"""
await session.execute(text(insert_admin_sql), {"password_hash": admin_password_hash})
logger.info("✅ Admin user created")
except Exception as e:
logger.warning(f"⚠️ Admin user creation warning: {e}")
# Create default subscription plans
try:
plans = [
("Free", 0, "month", 1, "Basic features for individuals"),
("Pro", 29.99, "month", 1, "Professional features for teams"),
("Enterprise", 99.99, "month", 1, "Advanced features for organizations")
]
for plan_name, amount, interval, interval_count, description in plans:
insert_plan_sql = """
INSERT INTO subscriptions (plan_name, amount, currency, interval, interval_count, status)
VALUES (:plan_name, :amount, 'USD', :interval, :interval_count, 'active')
ON CONFLICT (plan_name) DO NOTHING;
"""
await session.execute(text(insert_plan_sql), {
"plan_name": plan_name,
"amount": amount,
"interval": interval,
"interval_count": interval_count
})
logger.info("✅ Default subscription plans created")
except Exception as e:
logger.warning(f"⚠️ Subscription plans creation warning: {e}")
async def verify_database_setup():
"""Verify database setup"""
logger.info("🔍 Verifying database setup...")
engine = create_async_engine(settings.DATABASE_URL)
AsyncSessionLocal = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
try:
async with AsyncSessionLocal() as session:
# Check tables exist
tables_query = """
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public'
ORDER BY table_name;
"""
result = await session.execute(text(tables_query))
tables = [row[0] for row in result.fetchall()]
expected_tables = [
'users', 'experiments', 'datasets', 'audit_trails',
'subscriptions', 'invoices', 'payment_methods',
'benchmarks', 'analytics', 'notifications', 'system_logs'
]
logger.info("📋 Tables created:")
for table in tables:
status = "✅" if table in expected_tables else "⚠️"
logger.info(f" {status} {table}")
missing_tables = [t for t in expected_tables if t not in tables]
if missing_tables:
logger.error(f"❌ Missing tables: {missing_tables}")
else:
logger.info("✅ All expected tables created successfully!")
except Exception as e:
logger.error(f"❌ Database verification failed: {e}")
finally:
await engine.dispose()
async def main():
"""Main initialization function"""
try:
await create_database_tables()
await verify_database_setup()
logger.info("🎉 Supabase database initialization completed successfully!")
except Exception as e:
logger.error(f"❌ Database initialization failed: {e}")
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())