Spaces:
Paused
Paused
File size: 19,039 Bytes
5a81b95 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 | -- schema-validation.sql
-- Database Schema Validation and Statistics Report
-- Citizen Intelligence Agency - Open Source Intelligence Platform
-- Generated for PostgreSQL 16
--
-- Purpose: Validates database schema by counting objects, extracting samples,
-- and generating statistics reports for documentation validation
--
-- NOTE: For comprehensive validation of ALL 177 objects (93 tables + 56 views + 28 mviews),
-- use schema-validation-v2.sql which provides 100% coverage validation.
-- This script focuses on statistics and sampling.
--
-- Usage:
-- psql -U postgres -d cia_dev -f schema-validation.sql > schema_report.txt 2>&1
--
-- Output formats available:
-- 1. Text report (stdout)
-- 2. JSON report (see JSON output section)
-- 3. CSV inventory (see CSV output section)
--
-- Related: Issue Hack23/cia#7865 - Schema validation script foundation
-- See also: schema-validation-v2.sql (Issue Hack23/cia#7872 - 100% coverage)
\set ECHO queries
\timing on
-- ===========================================================================
-- PART 1: DATABASE OBJECT COUNTS
-- ===========================================================================
\echo ''
\echo '=========================================='
\echo '=== DATABASE OBJECT COUNTS ==='
\echo '=========================================='
\echo ''
-- Count base tables
SELECT 'Base Tables' AS object_type, COUNT(*) AS count
FROM information_schema.tables
WHERE table_schema = 'public' AND table_type = 'BASE TABLE';
-- Count regular views
SELECT 'Regular Views' AS object_type, COUNT(*) AS count
FROM information_schema.views
WHERE table_schema = 'public';
-- Count materialized views
SELECT 'Materialized Views' AS object_type, COUNT(*) AS count
FROM pg_matviews
WHERE schemaname = 'public';
-- Count indexes
SELECT 'Indexes' AS object_type, COUNT(*) AS count
FROM pg_indexes
WHERE schemaname = 'public';
-- Count sequences
SELECT 'Sequences' AS object_type, COUNT(*) AS count
FROM information_schema.sequences
WHERE sequence_schema = 'public';
-- Count functions
SELECT 'Functions' AS object_type, COUNT(*) AS count
FROM pg_proc p
JOIN pg_namespace n ON p.pronamespace = n.oid
WHERE n.nspname = 'public';
-- Summary
\echo ''
\echo 'Summary of database objects:'
SELECT
(SELECT COUNT(*) FROM information_schema.tables
WHERE table_schema = 'public' AND table_type = 'BASE TABLE') AS base_tables,
(SELECT COUNT(*) FROM information_schema.views
WHERE table_schema = 'public') AS regular_views,
(SELECT COUNT(*) FROM pg_matviews
WHERE schemaname = 'public') AS materialized_views,
(SELECT COUNT(*) FROM pg_indexes
WHERE schemaname = 'public') AS indexes,
(SELECT COUNT(*) FROM information_schema.sequences
WHERE sequence_schema = 'public') AS sequences,
(SELECT COUNT(*) FROM pg_proc p JOIN pg_namespace n ON p.pronamespace = n.oid
WHERE n.nspname = 'public') AS functions;
-- ===========================================================================
-- PART 2: TABLE INVENTORY WITH ROW COUNTS AND METADATA
-- ===========================================================================
\echo ''
\echo '=========================================='
\echo '=== TABLE INVENTORY ==='
\echo '=========================================='
\echo ''
DO $$
DECLARE
r RECORD;
row_count BIGINT;
col_count INTEGER;
table_size TEXT;
BEGIN
RAISE NOTICE 'TABLE INVENTORY WITH ROW COUNTS, COLUMNS, AND SIZE';
RAISE NOTICE '-----------------------------------------------------';
RAISE NOTICE 'Format: Schema.Table | Rows | Columns | Size';
RAISE NOTICE '';
FOR r IN
SELECT schemaname, tablename
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY tablename
LOOP
BEGIN
-- Get row count
EXECUTE format('SELECT COUNT(*) FROM %I.%I', r.schemaname, r.tablename) INTO row_count;
-- Get column count
SELECT COUNT(*) INTO col_count
FROM information_schema.columns
WHERE table_schema = r.schemaname AND table_name = r.tablename;
-- Get table size
EXECUTE format('SELECT pg_size_pretty(pg_total_relation_size(%L))',
r.schemaname || '.' || r.tablename) INTO table_size;
RAISE NOTICE 'Table: %.% | Rows: % | Columns: % | Size: %',
r.schemaname, r.tablename, row_count, col_count, table_size;
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'Table: %.% | ERROR: %', r.schemaname, r.tablename, SQLERRM;
END;
END LOOP;
END $$;
-- ===========================================================================
-- PART 3: VIEW INVENTORY WITH ROW COUNTS AND METADATA
-- ===========================================================================
\echo ''
\echo '=========================================='
\echo '=== VIEW INVENTORY ==='
\echo '=========================================='
\echo ''
DO $$
DECLARE
r RECORD;
row_count BIGINT;
col_count INTEGER;
BEGIN
RAISE NOTICE 'REGULAR VIEWS INVENTORY';
RAISE NOTICE '-----------------------';
RAISE NOTICE 'Format: Schema.View | Rows | Columns';
RAISE NOTICE '';
-- Regular views
FOR r IN
SELECT schemaname, viewname AS name
FROM pg_views
WHERE schemaname = 'public'
ORDER BY viewname
LOOP
BEGIN
-- Get row count
EXECUTE format('SELECT COUNT(*) FROM %I.%I', r.schemaname, r.name) INTO row_count;
-- Get column count
SELECT COUNT(*) INTO col_count
FROM information_schema.columns
WHERE table_schema = r.schemaname AND table_name = r.name;
RAISE NOTICE 'View: %.% | Rows: % | Columns: %',
r.schemaname, r.name, row_count, col_count;
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'View: %.% | ERROR: %', r.schemaname, r.name, SQLERRM;
END;
END LOOP;
RAISE NOTICE '';
RAISE NOTICE 'MATERIALIZED VIEWS INVENTORY';
RAISE NOTICE '----------------------------';
RAISE NOTICE 'Format: Schema.View | Rows | Columns | Size';
RAISE NOTICE '';
-- Materialized views
FOR r IN
SELECT schemaname, matviewname AS name
FROM pg_matviews
WHERE schemaname = 'public'
ORDER BY matviewname
LOOP
DECLARE
view_size TEXT;
BEGIN
-- Get row count
EXECUTE format('SELECT COUNT(*) FROM %I.%I', r.schemaname, r.name) INTO row_count;
-- Get column count
SELECT COUNT(*) INTO col_count
FROM information_schema.columns
WHERE table_schema = r.schemaname AND table_name = r.name;
-- Get materialized view size
EXECUTE format('SELECT pg_size_pretty(pg_total_relation_size(%L))',
r.schemaname || '.' || r.name) INTO view_size;
RAISE NOTICE 'Materialized View: %.% | Rows: % | Columns: % | Size: %',
r.schemaname, r.name, row_count, col_count, view_size;
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'Materialized View: %.% | ERROR: %', r.schemaname, r.name, SQLERRM;
END;
END LOOP;
END $$;
-- ===========================================================================
-- PART 4: SAMPLE DATA FROM KEY VIEWS
-- ===========================================================================
\echo ''
\echo '=========================================='
\echo '=== SAMPLE DATA FROM KEY VIEWS ==='
\echo '=========================================='
\echo ''
-- Sample from view_riksdagen_politician (if exists)
\echo '--- Sample from view_riksdagen_politician (5 rows) ---'
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM information_schema.views WHERE table_name = 'view_riksdagen_politician') THEN
RAISE NOTICE 'Executing: SELECT * FROM view_riksdagen_politician LIMIT 5';
ELSE
RAISE NOTICE 'View view_riksdagen_politician does not exist';
END IF;
END $$;
SELECT * FROM view_riksdagen_politician LIMIT 5;
\echo ''
\echo '--- Sample from view_riksdagen_party (5 rows) ---'
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM information_schema.views WHERE table_name = 'view_riksdagen_party') THEN
RAISE NOTICE 'Executing: SELECT * FROM view_riksdagen_party LIMIT 5';
ELSE
RAISE NOTICE 'View view_riksdagen_party does not exist';
END IF;
END $$;
SELECT * FROM view_riksdagen_party LIMIT 5;
\echo ''
\echo '--- Sample from view_riksdagen_vote_data_ballot_politician_summary_daily (recent 5 rows) ---'
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_matviews WHERE matviewname = 'view_riksdagen_vote_data_ballot_politician_summary_daily') THEN
RAISE NOTICE 'Executing: SELECT * FROM view_riksdagen_vote_data_ballot_politician_summary_daily ORDER BY vote_date DESC LIMIT 5';
ELSE
RAISE NOTICE 'Materialized view view_riksdagen_vote_data_ballot_politician_summary_daily does not exist';
END IF;
END $$;
-- Only select if table has vote_date column
DO $$
DECLARE
has_column BOOLEAN;
BEGIN
SELECT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'view_riksdagen_vote_data_ballot_politician_summary_daily'
AND column_name = 'vote_date'
) INTO has_column;
IF has_column THEN
EXECUTE format('SELECT * FROM %I ORDER BY vote_date DESC LIMIT 5', 'view_riksdagen_vote_data_ballot_politician_summary_daily');
ELSE
EXECUTE format('SELECT * FROM %I LIMIT 5', 'view_riksdagen_vote_data_ballot_politician_summary_daily');
END IF;
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'Could not query view: %', SQLERRM;
END $$;
-- ===========================================================================
-- PART 5: SCHEMA STATISTICS
-- ===========================================================================
\echo ''
\echo '=========================================='
\echo '=== SCHEMA STATISTICS ==='
\echo '=========================================='
\echo ''
-- Largest tables by row count
\echo '--- Top 20 Largest Tables by Row Count ---'
SELECT
schemaname,
tablename,
n_live_tup AS row_count,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) AS table_size,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename) - pg_relation_size(schemaname||'.'||tablename)) AS indexes_size
FROM pg_stat_user_tables
WHERE schemaname = 'public'
ORDER BY n_live_tup DESC
LIMIT 20;
\echo ''
\echo '--- Top 20 Largest Tables by Total Size ---'
SELECT
schemaname,
tablename,
n_live_tup AS row_count,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size
FROM pg_stat_user_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
LIMIT 20;
\echo ''
\echo '--- Tables with No Data ---'
SELECT
t.tablename,
COALESCE(s.n_live_tup, 0) AS row_count
FROM pg_tables t
LEFT JOIN pg_stat_user_tables s ON t.tablename = s.relname AND t.schemaname = s.schemaname
WHERE t.schemaname = 'public'
AND (s.n_live_tup = 0 OR s.n_live_tup IS NULL)
ORDER BY t.tablename;
\echo ''
\echo '--- Top 20 Views with Most Dependencies ---'
SELECT
dependent_view.relname AS view_name,
dependent_view.relkind AS view_type,
COUNT(DISTINCT source_table.relname) AS dependency_count
FROM pg_depend
JOIN pg_rewrite ON pg_depend.objid = pg_rewrite.oid
JOIN pg_class AS dependent_view ON pg_rewrite.ev_class = dependent_view.oid
JOIN pg_class AS source_table ON pg_depend.refobjid = source_table.oid
WHERE dependent_view.relkind IN ('v', 'm')
AND dependent_view.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'public')
GROUP BY dependent_view.relname, dependent_view.relkind
ORDER BY dependency_count DESC
LIMIT 20;
\echo ''
\echo '--- Database Size Summary ---'
SELECT
pg_size_pretty(pg_database_size(current_database())) AS database_size,
(SELECT COUNT(*) FROM pg_stat_user_tables WHERE schemaname = 'public') AS table_count,
(SELECT COUNT(*) FROM pg_views WHERE schemaname = 'public') AS view_count,
(SELECT COUNT(*) FROM pg_matviews WHERE schemaname = 'public') AS materialized_view_count,
(SELECT SUM(n_live_tup) FROM pg_stat_user_tables WHERE schemaname = 'public') AS total_rows;
-- ===========================================================================
-- PART 6: COLUMN STATISTICS
-- ===========================================================================
\echo ''
\echo '=========================================='
\echo '=== COLUMN STATISTICS ==='
\echo '=========================================='
\echo ''
\echo '--- Tables with Most Columns ---'
SELECT
table_name,
COUNT(*) AS column_count
FROM information_schema.columns
WHERE table_schema = 'public'
GROUP BY table_name
ORDER BY column_count DESC
LIMIT 20;
\echo ''
\echo '--- Most Common Column Names ---'
SELECT
column_name,
COUNT(*) AS usage_count
FROM information_schema.columns
WHERE table_schema = 'public'
GROUP BY column_name
ORDER BY usage_count DESC
LIMIT 20;
\echo ''
\echo '--- Most Common Data Types ---'
SELECT
data_type,
COUNT(*) AS usage_count
FROM information_schema.columns
WHERE table_schema = 'public'
GROUP BY data_type
ORDER BY usage_count DESC;
-- ===========================================================================
-- PART 7: INDEX STATISTICS
-- ===========================================================================
\echo ''
\echo '=========================================='
\echo '=== INDEX STATISTICS ==='
\echo '=========================================='
\echo ''
\echo '--- Index Usage Statistics (Top 20 by Size) ---'
SELECT
schemaname,
tablename,
indexname,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size,
idx_scan AS number_of_scans,
idx_tup_read AS tuples_read,
idx_tup_fetch AS tuples_fetched
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
ORDER BY pg_relation_size(indexrelid) DESC
LIMIT 20;
\echo ''
\echo '--- Unused Indexes (No Scans) ---'
SELECT
schemaname,
tablename,
indexname,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE schemaname = 'public'
AND idx_scan = 0
ORDER BY pg_relation_size(indexrelid) DESC;
-- ===========================================================================
-- PART 8: JSON OUTPUT (Optional)
-- ===========================================================================
\echo ''
\echo '=========================================='
\echo '=== JSON OUTPUT AVAILABLE ==='
\echo '=========================================='
\echo ''
\echo 'To generate JSON report, run:'
\echo 'psql -U postgres -d cia_dev -t -A -F"," -c "SELECT json_build_object(...)" > schema_report.json'
\echo ''
-- Commented out: Uncomment and modify path as needed
/*
COPY (
SELECT json_build_object(
'generated_at', NOW(),
'database', current_database(),
'summary', json_build_object(
'base_tables', (SELECT COUNT(*) FROM information_schema.tables
WHERE table_schema = 'public' AND table_type = 'BASE TABLE'),
'regular_views', (SELECT COUNT(*) FROM information_schema.views
WHERE table_schema = 'public'),
'materialized_views', (SELECT COUNT(*) FROM pg_matviews
WHERE schemaname = 'public'),
'indexes', (SELECT COUNT(*) FROM pg_indexes WHERE schemaname = 'public'),
'total_rows', (SELECT SUM(n_live_tup) FROM pg_stat_user_tables WHERE schemaname = 'public')
),
'tables', (
SELECT json_agg(json_build_object(
'name', tablename,
'rows', n_live_tup,
'size', pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename))
))
FROM pg_stat_user_tables
WHERE schemaname = 'public'
ORDER BY tablename
),
'views', (
SELECT json_agg(json_build_object(
'name', viewname,
'type', 'regular'
))
FROM pg_views
WHERE schemaname = 'public'
ORDER BY viewname
),
'materialized_views', (
SELECT json_agg(json_build_object(
'name', matviewname,
'type', 'materialized'
))
FROM pg_matviews
WHERE schemaname = 'public'
ORDER BY matviewname
)
)::text
) TO '/path/to/schema_report.json';
*/
-- ===========================================================================
-- PART 9: CSV OUTPUT (Optional)
-- ===========================================================================
\echo ''
\echo '=========================================='
\echo '=== CSV OUTPUT AVAILABLE ==='
\echo '=========================================='
\echo ''
\echo 'To generate CSV inventory, run:'
\echo 'psql -U postgres -d cia_dev -c "COPY (...) TO STDOUT WITH CSV HEADER" > schema_inventory.csv'
\echo ''
-- Commented out: Uncomment and modify path as needed
/*
COPY (
SELECT
'TABLE' AS object_type,
tablename AS object_name,
n_live_tup AS row_count,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
(SELECT COUNT(*) FROM information_schema.columns
WHERE table_schema = schemaname AND table_name = tablename) AS column_count
FROM pg_stat_user_tables
WHERE schemaname = 'public'
UNION ALL
SELECT
'VIEW' AS object_type,
viewname AS object_name,
NULL AS row_count,
NULL AS total_size,
(SELECT COUNT(*) FROM information_schema.columns
WHERE table_schema = schemaname AND table_name = viewname) AS column_count
FROM pg_views
WHERE schemaname = 'public'
UNION ALL
SELECT
'MATERIALIZED_VIEW' AS object_type,
matviewname AS object_name,
NULL AS row_count,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||matviewname)) AS total_size,
(SELECT COUNT(*) FROM information_schema.columns
WHERE table_schema = schemaname AND table_name = matviewname) AS column_count
FROM pg_matviews
WHERE schemaname = 'public'
ORDER BY object_type, object_name
) TO '/path/to/schema_inventory.csv' WITH (FORMAT CSV, HEADER);
*/
-- ===========================================================================
-- VALIDATION COMPLETE
-- ===========================================================================
\echo ''
\echo '=========================================='
\echo '=== VALIDATION COMPLETE ==='
\echo '=========================================='
\echo ''
\echo 'Schema validation report generated successfully.'
\echo 'Review the output above for schema health and statistics.'
\echo ''
\timing off
|