Spaces:
Paused
Paused
| -- schema-validation.sql | |
| -- Database Schema Validation and Statistics Report | |
| -- Citizen Intelligence Agency - Open Source Intelligence Platform | |
| -- Generated for PostgreSQL 16 | |
| -- | |
| -- Purpose: Validates database schema by counting objects, extracting samples, | |
| -- and generating statistics reports for documentation validation | |
| -- | |
| -- NOTE: For comprehensive validation of ALL 177 objects (93 tables + 56 views + 28 mviews), | |
| -- use schema-validation-v2.sql which provides 100% coverage validation. | |
| -- This script focuses on statistics and sampling. | |
| -- | |
| -- Usage: | |
| -- psql -U postgres -d cia_dev -f schema-validation.sql > schema_report.txt 2>&1 | |
| -- | |
| -- Output formats available: | |
| -- 1. Text report (stdout) | |
| -- 2. JSON report (see JSON output section) | |
| -- 3. CSV inventory (see CSV output section) | |
| -- | |
| -- Related: Issue Hack23/cia#7865 - Schema validation script foundation | |
| -- See also: schema-validation-v2.sql (Issue Hack23/cia#7872 - 100% coverage) | |
| \set ECHO queries | |
| \timing on | |
| -- =========================================================================== | |
| -- PART 1: DATABASE OBJECT COUNTS | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================' | |
| \echo '=== DATABASE OBJECT COUNTS ===' | |
| \echo '==========================================' | |
| \echo '' | |
| -- Count base tables | |
| SELECT 'Base Tables' AS object_type, COUNT(*) AS count | |
| FROM information_schema.tables | |
| WHERE table_schema = 'public' AND table_type = 'BASE TABLE'; | |
| -- Count regular views | |
| SELECT 'Regular Views' AS object_type, COUNT(*) AS count | |
| FROM information_schema.views | |
| WHERE table_schema = 'public'; | |
| -- Count materialized views | |
| SELECT 'Materialized Views' AS object_type, COUNT(*) AS count | |
| FROM pg_matviews | |
| WHERE schemaname = 'public'; | |
| -- Count indexes | |
| SELECT 'Indexes' AS object_type, COUNT(*) AS count | |
| FROM pg_indexes | |
| WHERE schemaname = 'public'; | |
| -- Count sequences | |
| SELECT 'Sequences' AS object_type, COUNT(*) AS count | |
| FROM information_schema.sequences | |
| WHERE sequence_schema = 'public'; | |
| -- Count functions | |
| SELECT 'Functions' AS object_type, COUNT(*) AS count | |
| FROM pg_proc p | |
| JOIN pg_namespace n ON p.pronamespace = n.oid | |
| WHERE n.nspname = 'public'; | |
| -- Summary | |
| \echo '' | |
| \echo 'Summary of database objects:' | |
| SELECT | |
| (SELECT COUNT(*) FROM information_schema.tables | |
| WHERE table_schema = 'public' AND table_type = 'BASE TABLE') AS base_tables, | |
| (SELECT COUNT(*) FROM information_schema.views | |
| WHERE table_schema = 'public') AS regular_views, | |
| (SELECT COUNT(*) FROM pg_matviews | |
| WHERE schemaname = 'public') AS materialized_views, | |
| (SELECT COUNT(*) FROM pg_indexes | |
| WHERE schemaname = 'public') AS indexes, | |
| (SELECT COUNT(*) FROM information_schema.sequences | |
| WHERE sequence_schema = 'public') AS sequences, | |
| (SELECT COUNT(*) FROM pg_proc p JOIN pg_namespace n ON p.pronamespace = n.oid | |
| WHERE n.nspname = 'public') AS functions; | |
| -- =========================================================================== | |
| -- PART 2: TABLE INVENTORY WITH ROW COUNTS AND METADATA | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================' | |
| \echo '=== TABLE INVENTORY ===' | |
| \echo '==========================================' | |
| \echo '' | |
| DO $$ | |
| DECLARE | |
| r RECORD; | |
| row_count BIGINT; | |
| col_count INTEGER; | |
| table_size TEXT; | |
| BEGIN | |
| RAISE NOTICE 'TABLE INVENTORY WITH ROW COUNTS, COLUMNS, AND SIZE'; | |
| RAISE NOTICE '-----------------------------------------------------'; | |
| RAISE NOTICE 'Format: Schema.Table | Rows | Columns | Size'; | |
| RAISE NOTICE ''; | |
| FOR r IN | |
| SELECT schemaname, tablename | |
| FROM pg_tables | |
| WHERE schemaname = 'public' | |
| ORDER BY tablename | |
| LOOP | |
| BEGIN | |
| -- Get row count | |
| EXECUTE format('SELECT COUNT(*) FROM %I.%I', r.schemaname, r.tablename) INTO row_count; | |
| -- Get column count | |
| SELECT COUNT(*) INTO col_count | |
| FROM information_schema.columns | |
| WHERE table_schema = r.schemaname AND table_name = r.tablename; | |
| -- Get table size | |
| EXECUTE format('SELECT pg_size_pretty(pg_total_relation_size(%L))', | |
| r.schemaname || '.' || r.tablename) INTO table_size; | |
| RAISE NOTICE 'Table: %.% | Rows: % | Columns: % | Size: %', | |
| r.schemaname, r.tablename, row_count, col_count, table_size; | |
| EXCEPTION WHEN OTHERS THEN | |
| RAISE NOTICE 'Table: %.% | ERROR: %', r.schemaname, r.tablename, SQLERRM; | |
| END; | |
| END LOOP; | |
| END $$; | |
| -- =========================================================================== | |
| -- PART 3: VIEW INVENTORY WITH ROW COUNTS AND METADATA | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================' | |
| \echo '=== VIEW INVENTORY ===' | |
| \echo '==========================================' | |
| \echo '' | |
| DO $$ | |
| DECLARE | |
| r RECORD; | |
| row_count BIGINT; | |
| col_count INTEGER; | |
| BEGIN | |
| RAISE NOTICE 'REGULAR VIEWS INVENTORY'; | |
| RAISE NOTICE '-----------------------'; | |
| RAISE NOTICE 'Format: Schema.View | Rows | Columns'; | |
| RAISE NOTICE ''; | |
| -- Regular views | |
| FOR r IN | |
| SELECT schemaname, viewname AS name | |
| FROM pg_views | |
| WHERE schemaname = 'public' | |
| ORDER BY viewname | |
| LOOP | |
| BEGIN | |
| -- Get row count | |
| EXECUTE format('SELECT COUNT(*) FROM %I.%I', r.schemaname, r.name) INTO row_count; | |
| -- Get column count | |
| SELECT COUNT(*) INTO col_count | |
| FROM information_schema.columns | |
| WHERE table_schema = r.schemaname AND table_name = r.name; | |
| RAISE NOTICE 'View: %.% | Rows: % | Columns: %', | |
| r.schemaname, r.name, row_count, col_count; | |
| EXCEPTION WHEN OTHERS THEN | |
| RAISE NOTICE 'View: %.% | ERROR: %', r.schemaname, r.name, SQLERRM; | |
| END; | |
| END LOOP; | |
| RAISE NOTICE ''; | |
| RAISE NOTICE 'MATERIALIZED VIEWS INVENTORY'; | |
| RAISE NOTICE '----------------------------'; | |
| RAISE NOTICE 'Format: Schema.View | Rows | Columns | Size'; | |
| RAISE NOTICE ''; | |
| -- Materialized views | |
| FOR r IN | |
| SELECT schemaname, matviewname AS name | |
| FROM pg_matviews | |
| WHERE schemaname = 'public' | |
| ORDER BY matviewname | |
| LOOP | |
| DECLARE | |
| view_size TEXT; | |
| BEGIN | |
| -- Get row count | |
| EXECUTE format('SELECT COUNT(*) FROM %I.%I', r.schemaname, r.name) INTO row_count; | |
| -- Get column count | |
| SELECT COUNT(*) INTO col_count | |
| FROM information_schema.columns | |
| WHERE table_schema = r.schemaname AND table_name = r.name; | |
| -- Get materialized view size | |
| EXECUTE format('SELECT pg_size_pretty(pg_total_relation_size(%L))', | |
| r.schemaname || '.' || r.name) INTO view_size; | |
| RAISE NOTICE 'Materialized View: %.% | Rows: % | Columns: % | Size: %', | |
| r.schemaname, r.name, row_count, col_count, view_size; | |
| EXCEPTION WHEN OTHERS THEN | |
| RAISE NOTICE 'Materialized View: %.% | ERROR: %', r.schemaname, r.name, SQLERRM; | |
| END; | |
| END LOOP; | |
| END $$; | |
| -- =========================================================================== | |
| -- PART 4: SAMPLE DATA FROM KEY VIEWS | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================' | |
| \echo '=== SAMPLE DATA FROM KEY VIEWS ===' | |
| \echo '==========================================' | |
| \echo '' | |
| -- Sample from view_riksdagen_politician (if exists) | |
| \echo '--- Sample from view_riksdagen_politician (5 rows) ---' | |
| DO $$ | |
| BEGIN | |
| IF EXISTS (SELECT 1 FROM information_schema.views WHERE table_name = 'view_riksdagen_politician') THEN | |
| RAISE NOTICE 'Executing: SELECT * FROM view_riksdagen_politician LIMIT 5'; | |
| ELSE | |
| RAISE NOTICE 'View view_riksdagen_politician does not exist'; | |
| END IF; | |
| END $$; | |
| SELECT * FROM view_riksdagen_politician LIMIT 5; | |
| \echo '' | |
| \echo '--- Sample from view_riksdagen_party (5 rows) ---' | |
| DO $$ | |
| BEGIN | |
| IF EXISTS (SELECT 1 FROM information_schema.views WHERE table_name = 'view_riksdagen_party') THEN | |
| RAISE NOTICE 'Executing: SELECT * FROM view_riksdagen_party LIMIT 5'; | |
| ELSE | |
| RAISE NOTICE 'View view_riksdagen_party does not exist'; | |
| END IF; | |
| END $$; | |
| SELECT * FROM view_riksdagen_party LIMIT 5; | |
| \echo '' | |
| \echo '--- Sample from view_riksdagen_vote_data_ballot_politician_summary_daily (recent 5 rows) ---' | |
| DO $$ | |
| BEGIN | |
| IF EXISTS (SELECT 1 FROM pg_matviews WHERE matviewname = 'view_riksdagen_vote_data_ballot_politician_summary_daily') THEN | |
| RAISE NOTICE 'Executing: SELECT * FROM view_riksdagen_vote_data_ballot_politician_summary_daily ORDER BY vote_date DESC LIMIT 5'; | |
| ELSE | |
| RAISE NOTICE 'Materialized view view_riksdagen_vote_data_ballot_politician_summary_daily does not exist'; | |
| END IF; | |
| END $$; | |
| -- Only select if table has vote_date column | |
| DO $$ | |
| DECLARE | |
| has_column BOOLEAN; | |
| BEGIN | |
| SELECT EXISTS ( | |
| SELECT 1 FROM information_schema.columns | |
| WHERE table_name = 'view_riksdagen_vote_data_ballot_politician_summary_daily' | |
| AND column_name = 'vote_date' | |
| ) INTO has_column; | |
| IF has_column THEN | |
| EXECUTE format('SELECT * FROM %I ORDER BY vote_date DESC LIMIT 5', 'view_riksdagen_vote_data_ballot_politician_summary_daily'); | |
| ELSE | |
| EXECUTE format('SELECT * FROM %I LIMIT 5', 'view_riksdagen_vote_data_ballot_politician_summary_daily'); | |
| END IF; | |
| EXCEPTION WHEN OTHERS THEN | |
| RAISE NOTICE 'Could not query view: %', SQLERRM; | |
| END $$; | |
| -- =========================================================================== | |
| -- PART 5: SCHEMA STATISTICS | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================' | |
| \echo '=== SCHEMA STATISTICS ===' | |
| \echo '==========================================' | |
| \echo '' | |
| -- Largest tables by row count | |
| \echo '--- Top 20 Largest Tables by Row Count ---' | |
| SELECT | |
| schemaname, | |
| tablename, | |
| n_live_tup AS row_count, | |
| pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size, | |
| pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) AS table_size, | |
| pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename) - pg_relation_size(schemaname||'.'||tablename)) AS indexes_size | |
| FROM pg_stat_user_tables | |
| WHERE schemaname = 'public' | |
| ORDER BY n_live_tup DESC | |
| LIMIT 20; | |
| \echo '' | |
| \echo '--- Top 20 Largest Tables by Total Size ---' | |
| SELECT | |
| schemaname, | |
| tablename, | |
| n_live_tup AS row_count, | |
| pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size | |
| FROM pg_stat_user_tables | |
| WHERE schemaname = 'public' | |
| ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC | |
| LIMIT 20; | |
| \echo '' | |
| \echo '--- Tables with No Data ---' | |
| SELECT | |
| t.tablename, | |
| COALESCE(s.n_live_tup, 0) AS row_count | |
| FROM pg_tables t | |
| LEFT JOIN pg_stat_user_tables s ON t.tablename = s.relname AND t.schemaname = s.schemaname | |
| WHERE t.schemaname = 'public' | |
| AND (s.n_live_tup = 0 OR s.n_live_tup IS NULL) | |
| ORDER BY t.tablename; | |
| \echo '' | |
| \echo '--- Top 20 Views with Most Dependencies ---' | |
| SELECT | |
| dependent_view.relname AS view_name, | |
| dependent_view.relkind AS view_type, | |
| COUNT(DISTINCT source_table.relname) AS dependency_count | |
| FROM pg_depend | |
| JOIN pg_rewrite ON pg_depend.objid = pg_rewrite.oid | |
| JOIN pg_class AS dependent_view ON pg_rewrite.ev_class = dependent_view.oid | |
| JOIN pg_class AS source_table ON pg_depend.refobjid = source_table.oid | |
| WHERE dependent_view.relkind IN ('v', 'm') | |
| AND dependent_view.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'public') | |
| GROUP BY dependent_view.relname, dependent_view.relkind | |
| ORDER BY dependency_count DESC | |
| LIMIT 20; | |
| \echo '' | |
| \echo '--- Database Size Summary ---' | |
| SELECT | |
| pg_size_pretty(pg_database_size(current_database())) AS database_size, | |
| (SELECT COUNT(*) FROM pg_stat_user_tables WHERE schemaname = 'public') AS table_count, | |
| (SELECT COUNT(*) FROM pg_views WHERE schemaname = 'public') AS view_count, | |
| (SELECT COUNT(*) FROM pg_matviews WHERE schemaname = 'public') AS materialized_view_count, | |
| (SELECT SUM(n_live_tup) FROM pg_stat_user_tables WHERE schemaname = 'public') AS total_rows; | |
| -- =========================================================================== | |
| -- PART 6: COLUMN STATISTICS | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================' | |
| \echo '=== COLUMN STATISTICS ===' | |
| \echo '==========================================' | |
| \echo '' | |
| \echo '--- Tables with Most Columns ---' | |
| SELECT | |
| table_name, | |
| COUNT(*) AS column_count | |
| FROM information_schema.columns | |
| WHERE table_schema = 'public' | |
| GROUP BY table_name | |
| ORDER BY column_count DESC | |
| LIMIT 20; | |
| \echo '' | |
| \echo '--- Most Common Column Names ---' | |
| SELECT | |
| column_name, | |
| COUNT(*) AS usage_count | |
| FROM information_schema.columns | |
| WHERE table_schema = 'public' | |
| GROUP BY column_name | |
| ORDER BY usage_count DESC | |
| LIMIT 20; | |
| \echo '' | |
| \echo '--- Most Common Data Types ---' | |
| SELECT | |
| data_type, | |
| COUNT(*) AS usage_count | |
| FROM information_schema.columns | |
| WHERE table_schema = 'public' | |
| GROUP BY data_type | |
| ORDER BY usage_count DESC; | |
| -- =========================================================================== | |
| -- PART 7: INDEX STATISTICS | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================' | |
| \echo '=== INDEX STATISTICS ===' | |
| \echo '==========================================' | |
| \echo '' | |
| \echo '--- Index Usage Statistics (Top 20 by Size) ---' | |
| SELECT | |
| schemaname, | |
| tablename, | |
| indexname, | |
| pg_size_pretty(pg_relation_size(indexrelid)) AS index_size, | |
| idx_scan AS number_of_scans, | |
| idx_tup_read AS tuples_read, | |
| idx_tup_fetch AS tuples_fetched | |
| FROM pg_stat_user_indexes | |
| WHERE schemaname = 'public' | |
| ORDER BY pg_relation_size(indexrelid) DESC | |
| LIMIT 20; | |
| \echo '' | |
| \echo '--- Unused Indexes (No Scans) ---' | |
| SELECT | |
| schemaname, | |
| tablename, | |
| indexname, | |
| pg_size_pretty(pg_relation_size(indexrelid)) AS index_size | |
| FROM pg_stat_user_indexes | |
| WHERE schemaname = 'public' | |
| AND idx_scan = 0 | |
| ORDER BY pg_relation_size(indexrelid) DESC; | |
| -- =========================================================================== | |
| -- PART 8: JSON OUTPUT (Optional) | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================' | |
| \echo '=== JSON OUTPUT AVAILABLE ===' | |
| \echo '==========================================' | |
| \echo '' | |
| \echo 'To generate JSON report, run:' | |
| \echo 'psql -U postgres -d cia_dev -t -A -F"," -c "SELECT json_build_object(...)" > schema_report.json' | |
| \echo '' | |
| -- Commented out: Uncomment and modify path as needed | |
| /* | |
| COPY ( | |
| SELECT json_build_object( | |
| 'generated_at', NOW(), | |
| 'database', current_database(), | |
| 'summary', json_build_object( | |
| 'base_tables', (SELECT COUNT(*) FROM information_schema.tables | |
| WHERE table_schema = 'public' AND table_type = 'BASE TABLE'), | |
| 'regular_views', (SELECT COUNT(*) FROM information_schema.views | |
| WHERE table_schema = 'public'), | |
| 'materialized_views', (SELECT COUNT(*) FROM pg_matviews | |
| WHERE schemaname = 'public'), | |
| 'indexes', (SELECT COUNT(*) FROM pg_indexes WHERE schemaname = 'public'), | |
| 'total_rows', (SELECT SUM(n_live_tup) FROM pg_stat_user_tables WHERE schemaname = 'public') | |
| ), | |
| 'tables', ( | |
| SELECT json_agg(json_build_object( | |
| 'name', tablename, | |
| 'rows', n_live_tup, | |
| 'size', pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) | |
| )) | |
| FROM pg_stat_user_tables | |
| WHERE schemaname = 'public' | |
| ORDER BY tablename | |
| ), | |
| 'views', ( | |
| SELECT json_agg(json_build_object( | |
| 'name', viewname, | |
| 'type', 'regular' | |
| )) | |
| FROM pg_views | |
| WHERE schemaname = 'public' | |
| ORDER BY viewname | |
| ), | |
| 'materialized_views', ( | |
| SELECT json_agg(json_build_object( | |
| 'name', matviewname, | |
| 'type', 'materialized' | |
| )) | |
| FROM pg_matviews | |
| WHERE schemaname = 'public' | |
| ORDER BY matviewname | |
| ) | |
| )::text | |
| ) TO '/path/to/schema_report.json'; | |
| */ | |
| -- =========================================================================== | |
| -- PART 9: CSV OUTPUT (Optional) | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================' | |
| \echo '=== CSV OUTPUT AVAILABLE ===' | |
| \echo '==========================================' | |
| \echo '' | |
| \echo 'To generate CSV inventory, run:' | |
| \echo 'psql -U postgres -d cia_dev -c "COPY (...) TO STDOUT WITH CSV HEADER" > schema_inventory.csv' | |
| \echo '' | |
| -- Commented out: Uncomment and modify path as needed | |
| /* | |
| COPY ( | |
| SELECT | |
| 'TABLE' AS object_type, | |
| tablename AS object_name, | |
| n_live_tup AS row_count, | |
| pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size, | |
| (SELECT COUNT(*) FROM information_schema.columns | |
| WHERE table_schema = schemaname AND table_name = tablename) AS column_count | |
| FROM pg_stat_user_tables | |
| WHERE schemaname = 'public' | |
| UNION ALL | |
| SELECT | |
| 'VIEW' AS object_type, | |
| viewname AS object_name, | |
| NULL AS row_count, | |
| NULL AS total_size, | |
| (SELECT COUNT(*) FROM information_schema.columns | |
| WHERE table_schema = schemaname AND table_name = viewname) AS column_count | |
| FROM pg_views | |
| WHERE schemaname = 'public' | |
| UNION ALL | |
| SELECT | |
| 'MATERIALIZED_VIEW' AS object_type, | |
| matviewname AS object_name, | |
| NULL AS row_count, | |
| pg_size_pretty(pg_total_relation_size(schemaname||'.'||matviewname)) AS total_size, | |
| (SELECT COUNT(*) FROM information_schema.columns | |
| WHERE table_schema = schemaname AND table_name = matviewname) AS column_count | |
| FROM pg_matviews | |
| WHERE schemaname = 'public' | |
| ORDER BY object_type, object_name | |
| ) TO '/path/to/schema_inventory.csv' WITH (FORMAT CSV, HEADER); | |
| */ | |
| -- =========================================================================== | |
| -- VALIDATION COMPLETE | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================' | |
| \echo '=== VALIDATION COMPLETE ===' | |
| \echo '==========================================' | |
| \echo '' | |
| \echo 'Schema validation report generated successfully.' | |
| \echo 'Review the output above for schema health and statistics.' | |
| \echo '' | |
| \timing off | |