MorphGuard / database_init /02_training_metrics_schema.sql
juanquy's picture
Initial clean commit of modular MorphGuard
2978bba
Raw
History Blame Contribute Delete
4.75 kB
-- Enhanced MorphGuard Database Schema for Training Metrics
-- Adds comprehensive training metrics collection to TimescaleDB
-- Training sessions table
CREATE TABLE IF NOT EXISTS training_sessions (
session_id UUID PRIMARY KEY,
model_type TEXT NOT NULL, -- 'detector' or 'demorpher'
model_name TEXT NOT NULL,
start_time TIMESTAMPTZ NOT NULL,
end_time TIMESTAMPTZ,
total_epochs INTEGER,
batch_size INTEGER,
learning_rate FLOAT,
optimizer TEXT,
scheduler TEXT,
loss_function TEXT,
dataset_size INTEGER,
validation_size INTEGER,
gpu_model TEXT,
pytorch_version TEXT,
cuda_version TEXT,
final_train_loss FLOAT,
final_val_loss FLOAT,
final_val_accuracy FLOAT,
best_val_accuracy FLOAT,
best_val_loss FLOAT,
total_training_time_seconds FLOAT,
status TEXT DEFAULT 'running', -- 'running', 'completed', 'failed'
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Training epoch metrics - detailed per-epoch tracking
CREATE TABLE IF NOT EXISTS training_epoch_metrics (
time TIMESTAMPTZ NOT NULL,
session_id UUID REFERENCES training_sessions(session_id),
epoch INTEGER NOT NULL,
train_loss FLOAT,
val_loss FLOAT,
val_accuracy FLOAT,
val_precision FLOAT,
val_recall FLOAT,
val_f1_score FLOAT,
learning_rate FLOAT,
epoch_time_seconds FLOAT,
gpu_memory_peak_gb FLOAT,
gpu_utilization_avg FLOAT,
cpu_utilization_avg FLOAT,
PRIMARY KEY (time, session_id, epoch)
);
-- Training batch metrics - high-frequency training monitoring
CREATE TABLE IF NOT EXISTS training_batch_metrics (
time TIMESTAMPTZ NOT NULL,
session_id UUID REFERENCES training_sessions(session_id),
epoch INTEGER NOT NULL,
batch INTEGER NOT NULL,
batch_loss FLOAT,
batch_time_ms FLOAT,
gpu_memory_gb FLOAT,
gpu_utilization FLOAT,
cpu_utilization FLOAT,
gpu_temperature FLOAT,
PRIMARY KEY (time, session_id, epoch, batch)
);
-- Model performance benchmarks
CREATE TABLE IF NOT EXISTS model_benchmarks (
time TIMESTAMPTZ NOT NULL,
model_type TEXT NOT NULL,
model_name TEXT NOT NULL,
session_id UUID REFERENCES training_sessions(session_id),
inference_time_ms FLOAT,
batch_size INTEGER,
input_size TEXT, -- e.g., "224x224x3"
accuracy FLOAT,
precision FLOAT,
recall FLOAT,
f1_score FLOAT,
gpu_model TEXT,
benchmark_type TEXT -- 'training_end', 'validation', 'production'
);
-- Convert to TimescaleDB hypertables
SELECT create_hypertable('training_epoch_metrics', 'time', if_not_exists => TRUE);
SELECT create_hypertable('training_batch_metrics', 'time', if_not_exists => TRUE);
SELECT create_hypertable('model_benchmarks', 'time', if_not_exists => TRUE);
-- Create indexes for efficient querying
CREATE INDEX IF NOT EXISTS idx_training_sessions_type_time ON training_sessions (model_type, start_time DESC);
CREATE INDEX IF NOT EXISTS idx_training_sessions_status ON training_sessions (status, start_time DESC);
CREATE INDEX IF NOT EXISTS idx_epoch_metrics_session ON training_epoch_metrics (session_id, epoch);
CREATE INDEX IF NOT EXISTS idx_batch_metrics_session ON training_batch_metrics (session_id, epoch, batch);
CREATE INDEX IF NOT EXISTS idx_benchmarks_model ON model_benchmarks (model_type, model_name, time DESC);
-- Retention policies for training data
SELECT add_retention_policy('training_epoch_metrics', INTERVAL '1 year', if_not_exists => TRUE);
SELECT add_retention_policy('training_batch_metrics', INTERVAL '3 months', if_not_exists => TRUE);
SELECT add_retention_policy('model_benchmarks', INTERVAL '2 years', if_not_exists => TRUE);
-- Views for easy monitoring
CREATE OR REPLACE VIEW training_summary AS
SELECT
ts.session_id,
ts.model_type,
ts.model_name,
ts.start_time,
ts.end_time,
ts.status,
ts.total_epochs,
ts.best_val_accuracy,
ts.best_val_loss,
ts.total_training_time_seconds / 3600.0 as training_hours,
CASE
WHEN ts.end_time IS NOT NULL THEN ts.end_time - ts.start_time
ELSE NOW() - ts.start_time
END as session_duration
FROM training_sessions ts
ORDER BY ts.start_time DESC;
CREATE OR REPLACE VIEW latest_training_metrics AS
SELECT
tem.session_id,
ts.model_type,
ts.model_name,
tem.epoch,
tem.train_loss,
tem.val_loss,
tem.val_accuracy,
tem.epoch_time_seconds,
tem.gpu_memory_peak_gb,
tem.time
FROM training_epoch_metrics tem
JOIN training_sessions ts ON tem.session_id = ts.session_id
WHERE tem.time >= NOW() - INTERVAL '24 hours'
ORDER BY tem.time DESC;
-- Grant permissions
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO morphguard;
GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO morphguard;