Spaces:
Paused
Paused
| -- Enable detailed query analysis | |
| SET track_io_timing = ON; | |
| SET enable_timing = ON; | |
| \echo '==========================================================================' | |
| \echo '=== Coalition Alignment Matrix - Detailed Subquery Analysis ===' | |
| \echo '==========================================================================' | |
| \echo '' | |
| \echo 'This analysis breaks down the coalition view into individual CTEs' | |
| \echo 'to identify performance bottlenecks and optimization opportunities.' | |
| \echo '' | |
| -- =========================================================================== | |
| -- SECTION 1: Analyze CTE 1 - party_votes (Base Data Filter) | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================================================' | |
| \echo '=== CTE 1: party_votes (5-year vote filter) ===' | |
| \echo '==========================================================================' | |
| \echo '' | |
| EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, SUMMARY true, FORMAT text) | |
| SELECT vote_data.party, | |
| vote_data.embedded_id_ballot_id AS ballot_id, | |
| vote_data.vote, | |
| vote_data.vote_date | |
| FROM public.vote_data | |
| WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval) | |
| AND vote_data.party IS NOT NULL | |
| AND vote_data.vote IS NOT NULL; | |
| \echo '' | |
| \echo '--- CTE 1 Analysis Summary ---' | |
| \echo 'Purpose: Filter vote_data for last 5 years with non-null party/vote' | |
| \echo 'Expected: Should use index on vote_date + party + vote' | |
| \echo 'Watch for: Sequential scans, missing indexes' | |
| \echo '' | |
| -- =========================================================================== | |
| -- SECTION 2: Analyze CTE 2 - party_pairs (Cartesian Product) | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================================================' | |
| \echo '=== CTE 2: party_pairs (All party combinations) ===' | |
| \echo '==========================================================================' | |
| \echo '' | |
| EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, SUMMARY true, FORMAT text) | |
| WITH party_votes AS ( | |
| SELECT vote_data.party, | |
| vote_data.embedded_id_ballot_id AS ballot_id, | |
| vote_data.vote, | |
| vote_data.vote_date | |
| FROM public.vote_data | |
| WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval) | |
| AND vote_data.party IS NOT NULL | |
| AND vote_data.vote IS NOT NULL | |
| ) | |
| SELECT DISTINCT p1.party AS party1, p2.party AS party2 | |
| FROM party_votes p1 | |
| CROSS JOIN party_votes p2 | |
| WHERE p1.party < p2.party; | |
| \echo '' | |
| \echo '--- CTE 2 Analysis Summary ---' | |
| \echo 'Purpose: Generate all unique party pairs (lower-triangle matrix)' | |
| \echo 'Expected: DISTINCT on small result set (8-12 parties = 28-66 pairs)' | |
| \echo 'Watch for: Expensive CROSS JOIN materialization' | |
| \echo '' | |
| -- =========================================================================== | |
| -- SECTION 3: Analyze CTE 3 - alignment_metrics (Heavy Aggregation) | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================================================' | |
| \echo '=== CTE 3: alignment_metrics (Vote alignment aggregation) ===' | |
| \echo '==========================================================================' | |
| \echo '' | |
| EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, SUMMARY true, FORMAT text) | |
| WITH party_votes AS ( | |
| SELECT vote_data.party, | |
| vote_data.embedded_id_ballot_id AS ballot_id, | |
| vote_data.vote, | |
| vote_data.vote_date | |
| FROM public.vote_data | |
| WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval) | |
| AND vote_data.party IS NOT NULL | |
| AND vote_data.vote IS NOT NULL | |
| ), party_pairs AS ( | |
| SELECT DISTINCT p1.party AS party1, p2.party AS party2 | |
| FROM party_votes p1 | |
| CROSS JOIN party_votes p2 | |
| WHERE p1.party < p2.party | |
| ) | |
| SELECT pp.party1, | |
| pp.party2, | |
| count(DISTINCT pv1.ballot_id) AS total_votes, | |
| count(DISTINCT CASE WHEN pv1.vote = pv2.vote THEN pv1.ballot_id END) AS aligned_votes, | |
| count(DISTINCT CASE WHEN pv1.vote = 'Ja' AND pv2.vote = 'Ja' THEN pv1.ballot_id END) AS both_yes, | |
| count(DISTINCT CASE WHEN pv1.vote = 'Nej' AND pv2.vote = 'Nej' THEN pv1.ballot_id END) AS both_no, | |
| count(DISTINCT CASE WHEN pv1.vote = 'Avstå' OR pv2.vote = 'Avstå' THEN pv1.ballot_id END) AS abstention_count, | |
| min(pv1.vote_date) AS earliest_vote, | |
| max(pv1.vote_date) AS latest_vote | |
| FROM party_pairs pp | |
| JOIN party_votes pv1 ON pv1.party = pp.party1 | |
| JOIN party_votes pv2 ON pv2.party = pp.party2 AND pv2.ballot_id = pv1.ballot_id | |
| GROUP BY pp.party1, pp.party2; | |
| \echo '' | |
| \echo '--- CTE 3 Analysis Summary ---' | |
| \echo 'Purpose: Calculate alignment metrics for each party pair' | |
| \echo 'Expected: Hash joins on party, nested loop on ballot_id' | |
| \echo 'Watch for: Multiple COUNT(DISTINCT), hash aggregation spills' | |
| \echo '' | |
| -- =========================================================================== | |
| -- SECTION 4: Analyze JOIN Strategy Between CTEs | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================================================' | |
| \echo '=== JOIN Analysis: party_pairs → party_votes (pv1) ===' | |
| \echo '==========================================================================' | |
| \echo '' | |
| EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, FORMAT text) | |
| WITH party_votes AS ( | |
| SELECT vote_data.party, | |
| vote_data.embedded_id_ballot_id AS ballot_id, | |
| vote_data.vote, | |
| vote_data.vote_date | |
| FROM public.vote_data | |
| WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval) | |
| AND vote_data.party IS NOT NULL | |
| AND vote_data.vote IS NOT NULL | |
| ), party_pairs AS ( | |
| SELECT DISTINCT p1.party AS party1, p2.party AS party2 | |
| FROM party_votes p1 CROSS JOIN party_votes p2 | |
| WHERE p1.party < p2.party | |
| ) | |
| SELECT pp.party1, pp.party2, COUNT(*) AS pv1_matches | |
| FROM party_pairs pp | |
| JOIN party_votes pv1 ON pv1.party = pp.party1 | |
| GROUP BY pp.party1, pp.party2 | |
| LIMIT 10; | |
| \echo '' | |
| \echo '--- First JOIN Analysis ---' | |
| \echo 'Join type: party_pairs → party_votes (filtering by party1)' | |
| \echo 'Expected: Hash join or nested loop' | |
| \echo '' | |
| \echo '' | |
| \echo '==========================================================================' | |
| \echo '=== JOIN Analysis: pv1 → party_votes (pv2) on ballot_id ===' | |
| \echo '==========================================================================' | |
| \echo '' | |
| EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, FORMAT text) | |
| WITH party_votes AS ( | |
| SELECT vote_data.party, | |
| vote_data.embedded_id_ballot_id AS ballot_id, | |
| vote_data.vote, | |
| vote_data.vote_date | |
| FROM public.vote_data | |
| WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval) | |
| AND vote_data.party IS NOT NULL | |
| AND vote_data.vote IS NOT NULL | |
| ), party_pairs AS ( | |
| SELECT DISTINCT p1.party AS party1, p2.party AS party2 | |
| FROM party_votes p1 CROSS JOIN party_votes p2 | |
| WHERE p1.party < p2.party | |
| ) | |
| SELECT pp.party1, pp.party2, COUNT(*) AS matching_ballots | |
| FROM party_pairs pp | |
| JOIN party_votes pv1 ON pv1.party = pp.party1 | |
| JOIN party_votes pv2 ON pv2.party = pp.party2 AND pv2.ballot_id = pv1.ballot_id | |
| GROUP BY pp.party1, pp.party2 | |
| LIMIT 10; | |
| \echo '' | |
| \echo '--- Second JOIN Analysis ---' | |
| \echo 'Join type: pv1 → party_votes (pv2) matching ballot_id' | |
| \echo 'Expected: Hash join on ballot_id with party filter' | |
| \echo 'Watch for: Nested loop if index available' | |
| \echo '' | |
| -- =========================================================================== | |
| -- SECTION 5: Analyze COUNT(DISTINCT) Performance | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================================================' | |
| \echo '=== Aggregation Analysis: COUNT(DISTINCT ballot_id) Performance ===' | |
| \echo '==========================================================================' | |
| \echo '' | |
| EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, FORMAT text) | |
| WITH party_votes AS ( | |
| SELECT vote_data.party, | |
| vote_data.embedded_id_ballot_id AS ballot_id, | |
| vote_data.vote, | |
| vote_data.vote_date | |
| FROM public.vote_data | |
| WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval) | |
| AND vote_data.party IS NOT NULL | |
| AND vote_data.vote IS NOT NULL | |
| ) | |
| SELECT | |
| COUNT(DISTINCT ballot_id) AS total_ballots, | |
| COUNT(DISTINCT CASE WHEN party = 'S' THEN ballot_id END) AS s_ballots, | |
| COUNT(DISTINCT CASE WHEN party = 'M' THEN ballot_id END) AS m_ballots | |
| FROM party_votes; | |
| \echo '' | |
| \echo '--- COUNT(DISTINCT) Analysis ---' | |
| \echo 'Purpose: Measure distinct ballot counting overhead' | |
| \echo 'Watch for: Hash aggregate vs. sort aggregate' | |
| \echo '' | |
| -- =========================================================================== | |
| -- SECTION 6: Full Query Analysis | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================================================' | |
| \echo '=== Full Query: Complete Coalition Alignment Matrix ===' | |
| \echo '==========================================================================' | |
| \echo '' | |
| EXPLAIN (ANALYZE true, VERBOSE true, BUFFERS true, TIMING true, SUMMARY true, FORMAT text) | |
| WITH party_votes AS ( | |
| SELECT vote_data.party, | |
| vote_data.embedded_id_ballot_id AS ballot_id, | |
| vote_data.vote, | |
| vote_data.vote_date | |
| FROM public.vote_data | |
| WHERE vote_data.vote_date >= (CURRENT_DATE - '5 years'::interval) | |
| AND vote_data.party IS NOT NULL | |
| AND vote_data.vote IS NOT NULL | |
| ), party_pairs AS ( | |
| SELECT DISTINCT p1.party AS party1, p2.party AS party2 | |
| FROM party_votes p1 CROSS JOIN party_votes p2 | |
| WHERE p1.party < p2.party | |
| ), alignment_metrics AS ( | |
| SELECT pp.party1, | |
| pp.party2, | |
| count(DISTINCT pv1.ballot_id) AS total_votes, | |
| count(DISTINCT CASE WHEN pv1.vote = pv2.vote THEN pv1.ballot_id END) AS aligned_votes, | |
| count(DISTINCT CASE WHEN pv1.vote = 'Ja' AND pv2.vote = 'Ja' THEN pv1.ballot_id END) AS both_yes, | |
| count(DISTINCT CASE WHEN pv1.vote = 'Nej' AND pv2.vote = 'Nej' THEN pv1.ballot_id END) AS both_no, | |
| count(DISTINCT CASE WHEN pv1.vote = 'Avstå' OR pv2.vote = 'Avstå' THEN pv1.ballot_id END) AS abstention_count, | |
| min(pv1.vote_date) AS earliest_vote, | |
| max(pv1.vote_date) AS latest_vote | |
| FROM party_pairs pp | |
| JOIN party_votes pv1 ON pv1.party = pp.party1 | |
| JOIN party_votes pv2 ON pv2.party = pp.party2 AND pv2.ballot_id = pv1.ballot_id | |
| GROUP BY pp.party1, pp.party2 | |
| ) | |
| SELECT party1, party2, total_votes, aligned_votes, both_yes, both_no, | |
| abstention_count, earliest_vote, latest_vote, | |
| round((aligned_votes::numeric / NULLIF(total_votes, 0) * 100), 2) AS alignment_percentage, | |
| round((both_yes::numeric / NULLIF(total_votes, 0) * 100), 2) AS joint_support_rate, | |
| round((both_no::numeric / NULLIF(total_votes, 0) * 100), 2) AS joint_opposition_rate, | |
| CASE | |
| WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.80 THEN 'STRONG_COALITION' | |
| WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.60 THEN 'MODERATE_COALITION' | |
| WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.40 THEN 'WEAK_ALIGNMENT' | |
| ELSE 'OPPOSITION' | |
| END AS coalition_strength, | |
| CASE | |
| WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.80 THEN 'Strong coalition partnership - consistent aligned voting' | |
| WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) >= 0.60 THEN 'Moderate coalition alignment - generally cooperative' | |
| WHEN (aligned_votes::numeric / NULLIF(total_votes, 0)) <= 0.40 THEN 'Opposition positioning - frequently divergent votes' | |
| ELSE 'Neutral relationship - mixed voting patterns' | |
| END AS coalition_assessment | |
| FROM alignment_metrics | |
| WHERE total_votes >= 10 | |
| ORDER BY alignment_percentage DESC, total_votes DESC; | |
| -- =========================================================================== | |
| -- SECTION 7: Index & Statistics Analysis | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================================================' | |
| \echo '=== Index & Statistics Analysis ===' | |
| \echo '==========================================================================' | |
| \echo '' | |
| \echo '--- Current Indexes on vote_data ---' | |
| SELECT | |
| indexname, | |
| indexdef | |
| FROM pg_indexes | |
| WHERE schemaname = 'public' | |
| AND tablename = 'vote_data' | |
| ORDER BY indexname; | |
| \echo '' | |
| \echo '--- Index Usage Statistics ---' | |
| SELECT | |
| schemaname, | |
| tablename, | |
| indexname, | |
| idx_scan AS index_scans, | |
| idx_tup_read AS tuples_read, | |
| idx_tup_fetch AS tuples_fetched, | |
| pg_size_pretty(pg_relation_size(indexrelid)) AS index_size | |
| FROM pg_stat_user_indexes | |
| WHERE schemaname = 'public' | |
| AND tablename = 'vote_data' | |
| ORDER BY idx_scan DESC; | |
| \echo '' | |
| \echo '--- Table Statistics ---' | |
| SELECT | |
| relname AS table_name, | |
| n_live_tup AS row_count, | |
| n_dead_tup AS dead_rows, | |
| pg_size_pretty(pg_total_relation_size(relid)) AS total_size, | |
| pg_size_pretty(pg_relation_size(relid)) AS table_size, | |
| pg_size_pretty(pg_total_relation_size(relid) - pg_relation_size(relid)) AS index_size, | |
| last_vacuum, | |
| last_autovacuum, | |
| last_analyze, | |
| last_autoanalyze | |
| FROM pg_stat_user_tables | |
| WHERE schemaname = 'public' | |
| AND relname = 'vote_data'; | |
| \echo '' | |
| \echo '--- Column Statistics (for optimizer) ---' | |
| SELECT | |
| schemaname, | |
| tablename, | |
| attname AS column_name, | |
| n_distinct, | |
| correlation, | |
| most_common_vals, | |
| most_common_freqs | |
| FROM pg_stats | |
| WHERE schemaname = 'public' | |
| AND tablename = 'vote_data' | |
| AND attname IN ('party', 'vote', 'vote_date', 'embedded_id_ballot_id') | |
| ORDER BY attname; | |
| -- =========================================================================== | |
| -- SECTION 8: Recommendations | |
| -- =========================================================================== | |
| \echo '' | |
| \echo '==========================================================================' | |
| \echo '=== Optimization Recommendations ===' | |
| \echo '==========================================================================' | |
| \echo '' | |
| \echo 'Based on the analysis above, consider:' | |
| \echo '' | |
| \echo '1. Missing Indexes (if sequential scans detected):' | |
| \echo ' CREATE INDEX idx_vote_data_coalition_filter' | |
| \echo ' ON vote_data (vote_date, party, vote)' | |
| \echo ' WHERE vote_date >= (CURRENT_DATE - INTERVAL ''5 years'')' | |
| \echo ' AND party IS NOT NULL AND vote IS NOT NULL;' | |
| \echo '' | |
| \echo ' CREATE INDEX idx_vote_data_party_ballot' | |
| \echo ' ON vote_data (party, embedded_id_ballot_id, vote);' | |
| \echo '' | |
| \echo '2. Materialized View (if query runs frequently):' | |
| \echo ' CREATE MATERIALIZED VIEW mv_coalition_alignment_matrix AS' | |
| \echo ' SELECT * FROM view_riksdagen_coalition_alignment_matrix;' | |
| \echo '' | |
| \echo '3. Statistics Update (if estimates are wrong):' | |
| \echo ' ALTER TABLE vote_data ALTER COLUMN party SET STATISTICS 1000;' | |
| \echo ' ALTER TABLE vote_data ALTER COLUMN embedded_id_ballot_id SET STATISTICS 1000;' | |
| \echo ' ANALYZE vote_data;' | |
| \echo '' | |
| \echo '4. Query Tuning:' | |
| \echo ' - Consider work_mem increase for hash aggregations' | |
| \echo ' - Evaluate if DISTINCT can be replaced with GROUP BY' | |
| \echo ' - Test if pre-filtering party_pairs improves performance' | |
| \echo '' | |
| \echo '==========================================================================' | |
| \echo '=== Analysis Complete ===' | |
| \echo '==========================================================================' | |